Exemplo n.º 1
0
    def __init__(self, sourceFolder, givenName, encoding='utf-8'):
        """
        Constructor: just sets up the VerseView Bible object.
        """
        # Setup and initialise the base class first
        Bible.__init__(self)
        self.objectNameString = 'VerseView XML Bible object'
        self.objectTypeString = 'VerseView'

        # Now we can set our object variables
        self.sourceFolder, self.givenName, self.encoding = sourceFolder, givenName, encoding
        self.sourceFilepath = os.path.join(self.sourceFolder, self.givenName)

        self.XMLTree = self.header = None  # Will hold the XML data

        # Get the data tables that we need for proper checking
        #self.ISOLanguages = ISO_639_3_Languages().loadData()
        self.genericBOS = BibleOrganisationalSystem('GENERIC-KJV-66-ENG')

        # Do a preliminary check on the readability of our file
        if not os.access(self.sourceFilepath, os.R_OK):
            vPrint(
                'Quiet', debuggingThisModule,
                "VerseViewXMLBible: File {!r} is unreadable".format(
                    self.sourceFilepath))

        self.name = self.givenName
Exemplo n.º 2
0
    def preload(self):
        """
        Load the metadata from the SQLite3 database.
        """
        fnPrint(debuggingThisModule, "preload()…")

        vPrint('Info', debuggingThisModule,
               _("Preloading {}…").format(self.sourceFilepath))

        fileExtensionUpper = self.fileExtension.upper()
        if fileExtensionUpper not in FILENAME_ENDINGS_TO_ACCEPT:
            logging.critical("{} doesn't appear to be a MySword file".format(
                self.sourceFilename))
        elif not self.sourceFilename.upper().endswith(
                BIBLE_FILENAME_ENDINGS_TO_ACCEPT[0]):
            logging.critical(
                "{} doesn't appear to be a MySword Bible file".format(
                    self.sourceFilename))

        connection = sqlite3.connect(self.sourceFilepath)
        connection.row_factory = sqlite3.Row  # Enable row names
        self.cursor = connection.cursor()

        # First get the settings
        if self.suppliedMetadata is None: self.suppliedMetadata = {}
        self.suppliedMetadata['MySword'] = {}
        self.cursor.execute('select * from Details')
        row = self.cursor.fetchone()
        for key in row.keys():
            self.suppliedMetadata['MySword'][key] = row[key]
        #dPrint( 'Quiet', debuggingThisModule, self.suppliedMetadata['MySword'] ); halt
        #if 'Description' in self.settingsDict and len(self.settingsDict['Description'])<40: self.name = self.settingsDict['Description']
        #if 'Abbreviation' in self.settingsDict: self.abbreviation = self.settingsDict['Abbreviation']
        if 'encryption' in self.suppliedMetadata['MySword']:
            logging.critical("{} is encrypted: level {}".format(
                self.sourceFilename,
                self.suppliedMetadata['MySword']['encryption']))

        self.BibleOrganisationalSystem = BibleOrganisationalSystem(
            'GENERIC-KJV-66-ENG')

        self.preloadDone = True
Exemplo n.º 3
0
class VerseViewXMLBible(Bible):
    """
    Class for reading, validating, and converting VerseViewXMLBible XML.
    """
    XMLNameSpace = "{http://www.w3.org/2001/XMLSchema-instance}"
    treeTag = 'bible'
    filenameTag = 'fname'
    revisionTag = 'revision'
    titleTag = 'title'
    fontTag = 'font'
    copyrightTag = 'copyright'
    sizefactorTag = 'sizefactor'
    bookTag = 'b'
    chapterTag = 'c'
    verseTag = 'v'

    def __init__(self, sourceFolder, givenName, encoding='utf-8'):
        """
        Constructor: just sets up the VerseView Bible object.
        """
        # Setup and initialise the base class first
        Bible.__init__(self)
        self.objectNameString = 'VerseView XML Bible object'
        self.objectTypeString = 'VerseView'

        # Now we can set our object variables
        self.sourceFolder, self.givenName, self.encoding = sourceFolder, givenName, encoding
        self.sourceFilepath = os.path.join(self.sourceFolder, self.givenName)

        self.XMLTree = self.header = None  # Will hold the XML data

        # Get the data tables that we need for proper checking
        #self.ISOLanguages = ISO_639_3_Languages().loadData()
        self.genericBOS = BibleOrganisationalSystem('GENERIC-KJV-66-ENG')

        # Do a preliminary check on the readability of our file
        if not os.access(self.sourceFilepath, os.R_OK):
            vPrint(
                'Quiet', debuggingThisModule,
                "VerseViewXMLBible: File {!r} is unreadable".format(
                    self.sourceFilepath))

        self.name = self.givenName
        #if self.name is None:
        #pass

    # end of VerseViewXMLBible.__init__

    def load(self):
        """
        Load a single source XML file and load book elements.
        """
        vPrint('Info', debuggingThisModule,
               _("Loading {}…").format(self.sourceFilepath))
        self.XMLTree = ElementTree().parse(self.sourceFilepath)
        if BibleOrgSysGlobals.debugFlag:
            assert self.XMLTree  # Fail here if we didn't load anything at all

        if self.suppliedMetadata is None: self.suppliedMetadata = {}
        self.suppliedMetadata['VerseView'] = {}

        # Find the main (bible) container
        if self.XMLTree.tag == VerseViewXMLBible.treeTag:
            location = "VerseView XML file"
            BibleOrgSysGlobals.checkXMLNoText(self.XMLTree, location, '4f6h')
            BibleOrgSysGlobals.checkXMLNoAttributes(self.XMLTree, location,
                                                    'js24')
            BibleOrgSysGlobals.checkXMLNoTail(self.XMLTree, location, '1wk8')

            # Find the submain (various info and then book) containers
            bookNumber = 0
            for element in self.XMLTree:
                if element.tag == VerseViewXMLBible.filenameTag:
                    sublocation = "filename in " + location
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        element, sublocation, 'jk86')
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        element, sublocation, 'hjk7')
                    BibleOrgSysGlobals.checkXMLNoTail(element, sublocation,
                                                      'bh09')
                    #self.filename = element.text
                elif element.tag == VerseViewXMLBible.revisionTag:
                    sublocation = "revision in " + location
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        element, sublocation, 'jk86')
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        element, sublocation, 'hjk7')
                    BibleOrgSysGlobals.checkXMLNoTail(element, sublocation,
                                                      'bh09')
                    self.suppliedMetadata['VerseView'][
                        'Revision'] = element.text
                elif element.tag == VerseViewXMLBible.titleTag:
                    sublocation = "title in " + location
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        element, sublocation, 'jk86')
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        element, sublocation, 'hjk7')
                    BibleOrgSysGlobals.checkXMLNoTail(element, sublocation,
                                                      'bh09')
                    self.suppliedMetadata['VerseView']['Title'] = element.text
                elif element.tag == VerseViewXMLBible.fontTag:
                    sublocation = "font in " + location
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        element, sublocation, 'jk86')
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        element, sublocation, 'hjk7')
                    BibleOrgSysGlobals.checkXMLNoTail(element, sublocation,
                                                      'bh09')
                    self.suppliedMetadata['VerseView']['Font'] = element.text
                elif element.tag == VerseViewXMLBible.copyrightTag:
                    sublocation = "copyright in " + location
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        element, sublocation, 'jk86')
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        element, sublocation, 'hjk7')
                    BibleOrgSysGlobals.checkXMLNoTail(element, sublocation,
                                                      'bh09')
                    self.suppliedMetadata['VerseView'][
                        'Copyright'] = element.text
                elif element.tag == VerseViewXMLBible.sizefactorTag:
                    sublocation = "sizefactor in " + location
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        element, sublocation, 'jk86')
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        element, sublocation, 'hjk7')
                    BibleOrgSysGlobals.checkXMLNoTail(element, sublocation,
                                                      'bh09')
                    if BibleOrgSysGlobals.debugFlag: assert element.text == '1'
                elif element.tag == VerseViewXMLBible.bookTag:
                    sublocation = "book in " + location
                    BibleOrgSysGlobals.checkXMLNoText(element, sublocation,
                                                      'g3g5')
                    BibleOrgSysGlobals.checkXMLNoTail(element, sublocation,
                                                      'd3f6')
                    bookNumber += 1
                    self.__validateAndExtractBook(element, bookNumber)
                else:
                    logging.error(
                        "xk15 Expected to find {!r} but got {!r}".format(
                            VerseViewXMLBible.bookTag, element.tag))
        else:
            logging.error("Expected to load {!r} but got {!r}".format(
                VerseViewXMLBible.treeTag, self.XMLTree.tag))

        if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2:
            # These are all compulsory so they should all exist
            #vPrint( 'Quiet', debuggingThisModule, "Filename is {!r}".format( self.filename ) )
            vPrint(
                'Quiet', debuggingThisModule, "Revision is {!r}".format(
                    self.suppliedMetadata['VerseView']['Revision']))
            vPrint(
                'Quiet', debuggingThisModule, "Title is {!r}".format(
                    self.suppliedMetadata['VerseView']['Title']))
            vPrint(
                'Quiet', debuggingThisModule, "Font is {!r}".format(
                    self.suppliedMetadata['VerseView']['Font']))
            vPrint(
                'Quiet', debuggingThisModule, "Copyright is {!r}".format(
                    self.suppliedMetadata['VerseView']['Copyright']))
            #vPrint( 'Quiet', debuggingThisModule, "SizeFactor is {!r}".format( self.sizeFactor ) )

        self.applySuppliedMetadata(
            'VerseView')  # Copy some to self.settingsDict
        self.doPostLoadProcessing()

    # end of VerseViewXMLBible.load

    def __validateAndExtractBook(self, book, bookNumber):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """

        vPrint('Verbose', debuggingThisModule, _("Validating XML book…"))

        # Process the div attributes first
        BBB = bookName = None
        for attrib, value in book.items():
            if attrib == "n":
                bookName = value
            else:
                logging.warning(
                    "Unprocessed {!r} attribute ({}) in book element".format(
                        attrib, value))

        if bookName:
            BBB = self.genericBOS.getBBBFromText(bookName)
        if BBB is None:
            adjustedBookName = BibleOrgSysGlobals.removeAccents(bookName)
            if adjustedBookName != bookName:
                BBB = self.genericBOS.getBBBFromText(adjustedBookName)
        BBB2 = BibleOrgSysGlobals.loadedBibleBooksCodes.getBBBFromReferenceNumber(
            bookNumber)
        if BBB2 != BBB:  # Just double check using the book number
            if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2:
                vPrint(
                    'Quiet', debuggingThisModule,
                    "Assuming that book {} {!r} is {} (not {})".format(
                        bookNumber, bookName, BBB2, BBB))
            BBB = BBB2
            #vPrint( 'Quiet', debuggingThisModule, BBB ); halt

        if BBB:
            vPrint('Info', debuggingThisModule,
                   _("Validating {} {}…").format(BBB, bookName))
            thisBook = BibleBook(self, BBB)
            thisBook.objectNameString = 'VerseView XML Bible Book object'
            thisBook.objectTypeString = 'VerseView'
            #thisBook.sourceFilepath = self.sourceFilepath
            for element in book:
                if element.tag == VerseViewXMLBible.chapterTag:
                    sublocation = "chapter in {}".format(BBB)
                    BibleOrgSysGlobals.checkXMLNoText(element, sublocation,
                                                      'j3jd')
                    BibleOrgSysGlobals.checkXMLNoTail(element, sublocation,
                                                      'al1d')
                    self.__validateAndExtractChapter(BBB, thisBook, element)
                else:
                    logging.error(
                        "vb26 Expected to find {!r} but got {!r}".format(
                            VerseViewXMLBible.chapterTag, element.tag))
            vPrint('Info', debuggingThisModule,
                   "  Saving {} into results…".format(BBB))
            self.stashBook(thisBook)

    # end of VerseViewXMLBible.__validateAndExtractBook

    def __validateAndExtractChapter(self, BBB: str, thisBook, chapter):
        """
        Check/validate and extract chapter data from the given XML book record
            finding and saving chapter numbers and
            finding and saving verse elements.
        """

        if BibleOrgSysGlobals.debugFlag and debuggingThisModule and BibleOrgSysGlobals.verbosityLevel > 3:
            vPrint('Quiet', debuggingThisModule, _("Validating XML chapter…"))

        # Process the chapter attributes first
        chapterNumber = numVerses = None
        for attrib, value in chapter.items():
            if attrib == "n":
                chapterNumber = value
            else:
                logging.warning(
                    "Unprocessed {!r} attribute ({}) in chapter element".
                    format(attrib, value))
        if chapterNumber:
            #vPrint( 'Quiet', debuggingThisModule, BBB, 'c', chapterNumber )
            thisBook.addLine('c', chapterNumber)
        else:
            logging.error(
                "Missing 'n' attribute in chapter element for {}".format(BBB))

        for element in chapter:
            if element.tag == VerseViewXMLBible.verseTag:
                location = "verse in {} {}".format(BBB, chapterNumber)
                self.__validateAndExtractVerse(BBB, chapterNumber, thisBook,
                                               element)
            else:
                logging.error("sv34 Expected to find {!r} but got {!r}".format(
                    VerseViewXMLBible.verseTag, element.tag))

    # end of VerseViewXMLBible.__validateAndExtractChapter

    def __validateAndExtractVerse(self, BBB: str, chapterNumber, thisBook,
                                  verse):
        """
        Check/validate and extract verse data from the given XML book record
            finding and saving verse elements.
        """

        if BibleOrgSysGlobals.debugFlag and debuggingThisModule and BibleOrgSysGlobals.verbosityLevel > 3:
            vPrint('Quiet', debuggingThisModule, _("Validating XML verse…"))

        location = "verse in {} {}".format(BBB, chapterNumber)
        BibleOrgSysGlobals.checkXMLNoSubelements(verse, location, 'sg20')
        BibleOrgSysGlobals.checkXMLNoTail(verse, location, 'l5ks')

        # Handle verse attributes
        verseNumber = toVerseNumber = None
        for attrib, value in verse.items():
            if attrib == "n":
                verseNumber = value
            else:
                logging.warning(
                    "Unprocessed {!r} attribute ({}) in verse element".format(
                        attrib, value))
        if BibleOrgSysGlobals.debugFlag: assert verseNumber
        location = "{}:{}".format(
            location, verseNumber)  # Get a better location description
        #thisBook.addLine( 'v', verseNumber )
        vText = '' if verse.text is None else verse.text
        if vText: vText = vText.strip()
        #if not vText: # This happens if a verse starts immediately with a style or note
        #logging.warning( "{} {}:{} has no text".format( BBB, chapterNumber, verseNumber ) )

        ## Handle verse subelements (notes and styled portions)
        #for subelement in verse:
        #if subelement.tag == VerseViewXMLBible.noteTag:
        #sublocation = "note in " + location
        #noteType = None
        #for attrib,value in subelement.items():
        #if attrib=="type": noteType = value
        #else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) )
        #if noteType and noteType not in ('variant',):
        #logging.warning( "Unexpected {} note type in {}".format( noteType, BBB ) )
        #nText, nTail = subelement.text, subelement.tail
        ##vPrint( 'Quiet', debuggingThisModule, "note", BBB, chapterNumber, verseNumber, noteType, repr(nText), repr(nTail) )
        #vText += "\\f + \\fk {} \\ft {}\\f*".format( noteType, nText ) if noteType else "\\f + \\ft {}\\f*".format( nText )
        #if nTail:
        #if '\n' in nTail:
        #vPrint( 'Quiet', debuggingThisModule, "VerseViewXMLBible.__validateAndExtractVerse: nTail {} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, nTail ) )
        #nTail = nTail.replace( '\n', ' ' )
        #vText += nTail
        #for sub2element in subelement:
        #if sub2element.tag == VerseViewXMLBible.styleTag:
        #sub2location = "style in " + sublocation
        #BibleOrgSysGlobals.checkXMLNoSubelements( sub2element, sub2location, 'fyt4' )
        #fs = css = idStyle = None
        #for attrib,value in sub2element.items():
        #if attrib=='fs': fs = value
        ##elif attrib=="css": css = value
        ##elif attrib=="id": idStyle = value
        #else: logging.warning( "Unprocessed {!r} attribute ({}) in style sub2element".format( attrib, value ) )
        #if BibleOrgSysGlobals.debugFlag: assert fs or css or idStyle
        #SFM = None
        #if fs == 'italic': SFM = '\\it'
        #elif fs == 'super': SFM = '\\bdit'
        #elif fs == 'emphasis': SFM = '\\em'
        #else: vPrint( 'Quiet', debuggingThisModule, "fs is", fs, "css is", css, "idStyle is", idStyle ); halt
        ##if css == "font-style:italic": SFM = '\\it'
        ##elif css == "font-style:italic;font-weight:bold": SFM = '\\bdit'
        ##elif css == "color:#FF0000": SFM = '\\em'
        ##elif css == "font-size: x-small; color:#8B8378": SFM = '\\add'
        ##elif css is None and idStyle=='cl:divineName': SFM = '\\nd'
        ##else: vPrint( 'Quiet', debuggingThisModule, "css is", css, "idStyle is", idStyle ); halt
        #sText, sTail = sub2element.text.strip(), sub2element.tail
        #if BibleOrgSysGlobals.debugFlag: assert sText
        #if SFM: vText += SFM+' ' + sText + SFM+'*'
        #else: vText += '\\sc ' + '['+css+']' + sText + '\\sc* ' # Use sc for unknown styles
        #if sTail: vText += sTail.strip()
        #else: logging.error( "df20 Expected to find {} but got {!r} in {}".format( VerseViewXMLBible.styleTag, sub2element.tag, sublocation ) )

        #elif subelement.tag == VerseViewXMLBible.styleTag:
        #sublocation = "style in " + location
        #BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 'f5gh' )
        #fs = css = idStyle = None
        #for attrib,value in subelement.items():
        #if attrib=="fs": fs = value
        ##elif attrib=="css": css = value
        ##elif attrib=="id": idStyle = value
        #else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) )
        #if BibleOrgSysGlobals.debugFlag: assert fs
        #SFM = None
        #if fs == 'super': SFM = '\\bdit'
        #elif fs == 'emphasis': SFM = '\\em'
        #else: vPrint( 'Quiet', debuggingThisModule, "fs is", fs, "css is", css, "idStyle is", idStyle ); halt
        ##if css == "font-style:italic": SFM = '\\it'
        ##elif css == "font-style:italic;font-weight:bold": SFM = '\\bdit'
        ##elif css == "color:#FF0000": SFM = '\\em'
        ##elif css == "font-size: x-small; color:#8B8378": SFM = '\\add'
        ##elif css is None and idStyle=='cl:divineName': SFM = '\\nd'
        ##else: vPrint( 'Quiet', debuggingThisModule, "css is", css, "idStyle is", idStyle ); halt
        #sText, sTail = subelement.text.strip(), subelement.tail
        #if BibleOrgSysGlobals.debugFlag: assert sText
        ##vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, sublocation )
        #if SFM: vText += SFM+' ' + sText + SFM+'*'
        #else: vText += '\\sc ' + '['+css+']' + sText + '\\sc* ' # Use sc for unknown styles
        #if sTail: vText += sTail.strip()

        #elif subelement.tag == VerseViewXMLBible.breakTag:
        #sublocation = "line break in " + location
        #BibleOrgSysGlobals.checkXMLNoText( subelement, sublocation, 'c1d4' )
        #BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 'g4g8' )
        #art = None
        #for attrib,value in subelement.items():
        #if attrib=="art":
        #art = value
        #else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) )
        #if BibleOrgSysGlobals.debugFlag: assert art == 'x-nl'
        ##vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, verseNumber )
        ##assert vText
        #if vText:
        #thisBook.addLine( 'v', verseNumber + ' ' + vText ); verseNumber = None
        #vText = ''
        #thisBook.addLine( 'm', subelement.tail.strip() if subelement.tail else '' )
        ##bTail = subelement.tail
        ##if bTail: vText = bTail.strip()
        #else: logging.error( "bd47 Expected to find NOTE or STYLE but got {!r} in {}".format( subelement.tag, location ) )

        if vText:  # This is the main text of the verse (follows the verse milestone)
            if '\n' in vText:
                vPrint(
                    'Quiet', debuggingThisModule,
                    "VerseViewXMLBible.__validateAndExtractVerse: vText {} {}:{} {!r}"
                    .format(BBB, chapterNumber, verseNumber, vText))
                vText = vText.replace('\n', ' ')
            thisBook.addLine('v', verseNumber + ' ' + vText)
            verseNumber = None
    def importDataToPython(self):
        """
        Loads (and pivots) the data (not including the header) into suitable Python containers to use in a Python program.
        (Of course, you can just use the elementTree in self._XMLTree if you prefer.)
        """
        def makeList(parameter1, parameter2):
            """
            Returns a list containing all parameters. Parameter1 may already be a list.
            """
            if isinstance(parameter1, list):
                #assert parameter2 not in parameter1
                parameter1.append(parameter2)
                return parameter1
            else:
                return [parameter1, parameter2]

        # end of makeList

        assert self._XMLTree
        if self.__DataList:  # We've already done an import/restructuring -- no need to repeat it
            return self.__DataList, self.__DataDict

        # We'll create a number of dictionaries with different elements as the key
        rawRefLinkList = []
        actualLinkCount = 0
        for element in self._XMLTree:
            #vPrint( 'Quiet', debuggingThisModule, BibleOrgSysGlobals.elementStr( element ) )

            # Get these first for helpful error messages
            sourceReference = element.find('sourceReference').text
            sourceComponent = element.find('sourceComponent').text
            assert sourceComponent in (
                'Section',
                'Verses',
                'Verse',
            )

            BibleOrgSysGlobals.checkXMLNoText(element, sourceReference, 'kls1')
            BibleOrgSysGlobals.checkXMLNoAttributes(element, sourceReference,
                                                    'kd21')
            BibleOrgSysGlobals.checkXMLNoTail(element, sourceReference, 'so20')

            actualRawLinksList = []
            for subelement in element:
                #vPrint( 'Quiet', debuggingThisModule, BibleOrgSysGlobals.elementStr( subelement ) )
                if subelement.tag in (
                        'sourceReference',
                        'sourceComponent',
                ):  # already processed these
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        subelement, sourceReference, 'ls12')
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        subelement, sourceReference, 'ks02')
                    BibleOrgSysGlobals.checkXMLNoTail(subelement,
                                                      sourceReference, 'sqw1')

                elif subelement.tag == 'BibleReferenceLink':
                    BibleOrgSysGlobals.checkXMLNoText(subelement,
                                                      sourceReference, 'haw9')
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        subelement, sourceReference, 'hs19')
                    BibleOrgSysGlobals.checkXMLNoTail(subelement,
                                                      sourceReference, 'jsd9')

                    targetReference = subelement.find('targetReference').text
                    targetComponent = subelement.find('targetComponent').text
                    assert targetComponent in (
                        'Section',
                        'Verses',
                        'Verse',
                    )
                    linkType = subelement.find('linkType').text
                    assert linkType in (
                        'TSK',
                        'QuotedOTReference',
                        'AlludedOTReference',
                        'PossibleOTReference',
                    )

                    actualRawLinksList.append((
                        targetReference,
                        targetComponent,
                        linkType,
                    ))
                    actualLinkCount += 1

            rawRefLinkList.append((
                sourceReference,
                sourceComponent,
                actualRawLinksList,
            ))

        vPrint(
            'Normal', debuggingThisModule,
            f"  {len(rawRefLinkList):,} raw links loaded (with {actualLinkCount:,} actual raw link entries)"
        )

        myRefLinkList = []
        actualLinkCount = 0
        BOS = BibleOrganisationalSystem('GENERIC-KJV-66-ENG')

        for j, (sourceReference, sourceComponent,
                actualRawLinksList) in enumerate(rawRefLinkList):
            # Just do some testing first
            if sourceComponent == 'Verse':
                x = SimpleVerseKey(sourceReference)
            else:
                flag = False
                try:
                    x = SimpleVerseKey(sourceReference, ignoreParseErrors=True)
                    flag = True
                except TypeError:
                    pass  # This should happen coz it should fail the SVK
                if flag:
                    logging.error("{} {!r} failed!".format(
                        sourceComponent, sourceReference))
                    raise TypeError
            # Now do the actual parsing
            parsedSourceReference = FlexibleVersesKey(sourceReference)
            if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                vPrint('Quiet', debuggingThisModule, j, sourceComponent,
                       sourceReference, parsedSourceReference)
                #assert parsedSourceReference.getShortText().replace(' ','_') == sourceReference
            actualLinksList = []
            for k, (targetReference, targetComponent,
                    linkType) in enumerate(actualRawLinksList):
                # Just do some testing first
                if targetComponent == 'Verse':
                    x = SimpleVerseKey(targetReference)
                else:
                    flag = False
                    try:
                        x = SimpleVerseKey(targetReference,
                                           ignoreParseErrors=True)
                        flag = True
                    except TypeError:
                        pass  # This should happen coz it should fail the SVK
                    if flag:
                        logging.error("{} {!r} failed!".format(
                            targetComponent, targetReference))
                        raise TypeError
                # Now do the actual parsing
                try:
                    parsedTargetReference = FlexibleVersesKey(targetReference)
                except TypeError:
                    logging.error(
                        "  Temporarily ignored {!r} (TypeError from FlexibleVersesKey)"
                        .format(targetReference))
                    parsedTargetReference = None
                if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                    vPrint('Quiet', debuggingThisModule, ' ', targetComponent,
                           targetReference, parsedTargetReference)
                    #assert parsedTargetReference.getShortText().replace(' ','_',1) == targetReference

                actualLinksList.append((
                    targetReference,
                    targetComponent,
                    parsedTargetReference,
                    linkType,
                ))
                actualLinkCount += 1

            myRefLinkList.append((
                sourceReference,
                sourceComponent,
                parsedSourceReference,
                actualLinksList,
            ))

        vPrint(
            'Normal', debuggingThisModule,
            "  {:,} links processed (with {:,} actual link entries)".format(
                len(rawRefLinkList), actualLinkCount))
        #vPrint( 'Quiet', debuggingThisModule, myRefLinkList ); halt
        self.__DataList = myRefLinkList

        # Now put it into my dictionaries for easy access
        # This part should be customized or added to for however you need to process the data

        # Create a link dictionary (by verse key)
        myRefLinkDict = {}
        for sourceReference, sourceComponent, parsedSourceReference, actualLinksList in myRefLinkList:
            #vPrint( 'Quiet', debuggingThisModule, sourceReference, sourceComponent, parsedSourceReference )
            #vPrint( 'Quiet', debuggingThisModule, sourceReference, sourceComponent, parsedSourceReference, actualLinksList )
            for verseRef in parsedSourceReference.getIncludedVerses():
                #vPrint( 'Quiet', debuggingThisModule, verseRef )
                assert isinstance(verseRef, SimpleVerseKey)
                if verseRef not in myRefLinkDict: myRefLinkDict[verseRef] = []
                myRefLinkDict[verseRef].append((
                    sourceReference,
                    sourceComponent,
                    parsedSourceReference,
                    actualLinksList,
                ))
            #vPrint( 'Quiet', debuggingThisModule, myRefLinkDict ); halt
        originalLinks = len(myRefLinkDict)
        vPrint(
            'Quiet', debuggingThisModule,
            "  {:,} verse links added to dictionary (includes filling out spans)"
            .format(originalLinks))
        #vPrint( 'Quiet', debuggingThisModule, myRefLinkDict ); halt

        # Create a reversed link dictionary (by verse key)
        for sourceReference, sourceComponent, parsedSourceReference, actualLinksList in myRefLinkList:
            #vPrint( 'Quiet', debuggingThisModule, sourceReference, sourceComponent, parsedSourceReference )
            #vPrint( 'Quiet', debuggingThisModule, sourceReference, sourceComponent, parsedSourceReference, actualLinksList )
            for targetReference, targetComponent, parsedTargetReference, linkType in actualLinksList:
                if parsedTargetReference is not None:
                    for verseRef in parsedTargetReference.getIncludedVerses():
                        #vPrint( 'Quiet', debuggingThisModule, verseRef )
                        assert isinstance(verseRef, SimpleVerseKey)
                        if linkType == 'TSK': reverseLinkType = 'TSKQuoted'
                        elif linkType == 'QuotedOTReference':
                            reverseLinkType = 'OTReferenceQuoted'
                        elif linkType == 'AlludedOTReference':
                            reverseLinkType = 'OTReferenceAlluded'
                        elif linkType == 'PossibleOTReference':
                            reverseLinkType = 'OTReferencePossible'
                        else:
                            halt  # Have a new linkType!
                        if verseRef not in myRefLinkDict:
                            myRefLinkDict[verseRef] = []
                        myRefLinkDict[verseRef].append(
                            (targetReference, targetComponent,
                             parsedTargetReference, [
                                 (sourceReference, sourceComponent,
                                  parsedSourceReference, reverseLinkType)
                             ]))
            #vPrint( 'Quiet', debuggingThisModule, myRefLinkDict ); halt
        totalLinks = len(myRefLinkDict)
        reverseLinks = totalLinks - originalLinks
        vPrint(
            'Quiet', debuggingThisModule,
            "  {:,} reverse links added to dictionary to give {:,} total".
            format(reverseLinks, totalLinks))
        #vPrint( 'Quiet', debuggingThisModule, myRefLinkDict ); halt

        self.__DataDict = myRefLinkDict

        # Let's find the most number of references for a verse
        mostReferences = totalReferences = 0
        for verseRef, entryList in self.__DataDict.items():
            numRefs = len(entryList)
            if numRefs > mostReferences:
                mostReferences, mostVerseRef = numRefs, verseRef
            totalReferences += numRefs
        vPrint(
            'Quiet', debuggingThisModule,
            "  {:,} maximum links for any one reference ({})".format(
                mostReferences, mostVerseRef.getShortText()))
        vPrint('Quiet', debuggingThisModule,
               "  {:,} total links for all references".format(totalReferences))

        return self.__DataList, self.__DataDict
Exemplo n.º 5
0
def createMySwordModule(self, outputFolder, controlDict):
    """
    Create a SQLite3 database module for the program MySword.

    self here is a Bible object with _processedLines
    """
    import tarfile
    from BibleOrgSys.Internals.InternalBibleInternals import BOS_ADDED_NESTING_MARKERS, BOS_NESTING_MARKERS
    from BibleOrgSys.Formats.theWordBible import theWordOTBookLines, theWordNTBookLines, theWordBookLines, theWordHandleIntroduction, theWordComposeVerseLine

    def writeMSBook(sqlObject, BBB: str, ourGlobals):
        """
        Writes a book to the MySword sqlObject file.
        """
        nonlocal lineCount
        bkData = self.books[BBB] if BBB in self.books else None
        #dPrint( 'Quiet', debuggingThisModule, bkData._processedLines )
        verseList = BOS.getNumVersesList(BBB)
        nBBB = BibleOrgSysGlobals.loadedBibleBooksCodes.getReferenceNumber(BBB)
        numC, numV = len(verseList), verseList[0]

        ourGlobals['line'], ourGlobals['lastLine'] = '', None
        ourGlobals['pi1'] = ourGlobals['pi2'] = ourGlobals['pi3'] = ourGlobals[
            'pi4'] = ourGlobals['pi5'] = ourGlobals['pi6'] = ourGlobals[
                'pi7'] = False
        if bkData:
            # Write book headings (stuff before chapter 1)
            ourGlobals['line'] = theWordHandleIntroduction(
                BBB, bkData, ourGlobals)

            # Write the verses
            C = V = 1
            ourGlobals['lastLine'] = ourGlobals['lastBCV'] = None
            while True:
                verseData = None
                if bkData:
                    try:
                        result = bkData.getContextVerseData((
                            BBB,
                            str(C),
                            str(V),
                        ))
                        verseData, context = result
                    except KeyError:  # Missing verses
                        logging.warning(
                            "BibleWriter.createMySwordModule: missing source verse at {} {}:{}"
                            .format(BBB, C, V))
                    # Handle some common versification anomalies
                    if (BBB, C, V) == ('JN3', 1,
                                       14):  # Add text for v15 if it exists
                        try:
                            result15 = bkData.getContextVerseData((
                                'JN3',
                                '1',
                                '15',
                            ))
                            verseData15, context15 = result15
                            verseData.extend(verseData15)
                        except KeyError:
                            pass  #  just ignore it
                    elif (BBB, C, V) == ('REV', 12,
                                         17):  # Add text for v15 if it exists
                        try:
                            result18 = bkData.getContextVerseData((
                                'REV',
                                '12',
                                '18',
                            ))
                            verseData18, context18 = result18
                            verseData.extend(verseData18)
                        except KeyError:
                            pass  #  just ignore it
                    composedLine = ''
                    if verseData:
                        composedLine = theWordComposeVerseLine(
                            BBB, C, V, verseData, ourGlobals)
                    # Stay one line behind (because paragraph indicators get appended to the previous line)
                    if ourGlobals['lastBCV'] is not None \
                    and ourGlobals['lastLine']: # don't bother writing blank (unfinished?) verses
                        sqlObject.execute( 'INSERT INTO "Bible" VALUES(?,?,?,?)', \
                            (ourGlobals['lastBCV'][0],ourGlobals['lastBCV'][1],ourGlobals['lastBCV'][2],ourGlobals['lastLine']) )
                        lineCount += 1
                    ourGlobals['lastLine'] = composedLine
                ourGlobals['lastBCV'] = (nBBB, C, V)
                V += 1
                if V > numV:
                    C += 1
                    if C > numC:
                        break
                    else:  # next chapter only
                        numV = verseList[C - 1]
                        V = 1
            #assert not ourGlobals['line'] and not ourGlobals['lastLine'] #  We should have written everything

        # Write the last line of the file
        if ourGlobals[
                'lastLine']:  # don't bother writing blank (unfinished?) verses
            sqlObject.execute( 'INSERT INTO "Bible" VALUES(?,?,?,?)', \
                (ourGlobals['lastBCV'][0],ourGlobals['lastBCV'][1],ourGlobals['lastBCV'][2],ourGlobals['lastLine']) )
            lineCount += 1

    # end of createMySwordModule.writeMSBook

    # Set-up their Bible reference system
    BOS = BibleOrganisationalSystem('GENERIC-KJV-66-ENG')
    #BRL = BibleReferenceList( BOS, BibleObject=None )

    # Try to figure out if it's an OT/NT or what (allow for up to 4 extra books like FRT,GLS, etc.)
    if len(self) <= (39 + 4) and self.containsAnyOT39Books(
    ) and not self.containsAnyNT27Books():
        testament, startBBB, endBBB = 'OT', 'GEN', 'MAL'
        booksExpected, textLineCountExpected, checkTotals = 39, 23145, theWordOTBookLines
    elif len(self) <= (27 + 4) and self.containsAnyNT27Books(
    ) and not self.containsAnyOT39Books():
        testament, startBBB, endBBB = 'NT', 'MAT', 'REV'
        booksExpected, textLineCountExpected, checkTotals = 27, 7957, theWordNTBookLines
    else:  # assume it's an entire Bible
        testament, startBBB, endBBB = 'BOTH', 'GEN', 'REV'
        booksExpected, textLineCountExpected, checkTotals = 66, 31102, theWordBookLines
    extension = '.bbl.mybible'

    vPrint('Info', debuggingThisModule, _("  Exporting to MySword format…"))
    mySettings = {}
    mySettings['unhandledMarkers'] = set()
    handledBooks = []

    if 'MySwordOutputFilename' in controlDict:
        filename = controlDict['MySwordOutputFilename']
    elif self.sourceFilename:
        filename = self.sourceFilename
    elif self.shortName:
        filename = self.shortName
    elif self.abbreviation:
        filename = self.abbreviation
    elif self.name:
        filename = self.name
    else:
        filename = 'export'
    if not filename.endswith(extension):
        filename += extension  # Make sure that we have the right file extension
    filepath = os.path.join(outputFolder,
                            BibleOrgSysGlobals.makeSafeFilename(filename))
    if os.path.exists(filepath): os.remove(filepath)
    vPrint('Info', debuggingThisModule,
           '  createMySwordModule: ' + _("Writing {!r}…").format(filepath))
    conn = sqlite3.connect(filepath)
    cursor = conn.cursor()

    # First write the settings Details table
    exeStr = 'CREATE TABLE Details(Description NVARCHAR(255), Abbreviation NVARCHAR(50), Comments TEXT, Version TEXT, VersionDate DATETIME, PublishDate DATETIME, RightToLeft BOOL, OT BOOL, NT BOOL, Strong BOOL'  # incomplete
    customCSS = self.getSetting('CustomCSS')
    if customCSS: exeStr += ', CustomCSS TEXT'
    exeStr += ')'
    cursor.execute(exeStr)

    values = []

    description = self.getSetting('Description')
    if not description: description = self.getSetting('description')
    if not description: description = self.name
    values.append(description)

    if self.abbreviation: abbreviation = self.abbreviation
    else: abbreviation = self.getSetting('WorkAbbreviation')
    if not abbreviation: abbreviation = self.name[:3].upper()
    values.append(abbreviation)

    comments = self.getSetting('Comments')
    values.append(comments)

    version = self.getSetting('Version')
    values.append(version)

    versionDate = self.getSetting('VersionDate')
    values.append(versionDate)

    publishDate = self.getSetting('PublishDate')
    values.append(publishDate)

    rightToLeft = self.getSetting('RightToLeft')
    values.append(rightToLeft)

    values.append(True if testament == 'OT' or testament == 'BOTH' else False)
    values.append(True if testament == 'NT' or testament == 'BOTH' else False)

    Strong = self.getSetting('Strong')
    values.append(Strong if Strong else False)

    if customCSS: values.append(customCSS)

    exeStr = 'INSERT INTO "Details" VALUES(' + '?,' * (len(values) - 1) + '?)'
    #dPrint( 'Quiet', debuggingThisModule, exeStr, values )
    cursor.execute(exeStr, values)
    #if BibleOrgSysGlobals.debugFlag: cursor.execute( exeStr, values )
    #else: # Not debugging
    #try: cursor.execute( exeStr, values )
    #except sqlite3.InterfaceError:
    #logging.critical( "SQLite3 Interface error executing {} with {}".format( exeStr, values ) )

    # Now create and fill the Bible table
    cursor.execute(
        'CREATE TABLE Bible(Book INT, Chapter INT, Verse INT, Scripture TEXT, Primary Key(Book,Chapter,Verse))'
    )
    conn.commit()  # save (commit) the changes
    BBB, lineCount = startBBB, 0
    while True:  # Write each Bible book in the KJV order
        writeMSBook(cursor, BBB, mySettings)
        conn.commit()  # save (commit) the changes
        handledBooks.append(BBB)
        if BBB == endBBB: break
        BBB = BOS.getNextBookCode(BBB)
    conn.commit()  # save (commit) the changes
    cursor.close()

    if mySettings['unhandledMarkers']:
        logging.warning(
            "BibleWriter.createMySwordModule: Unhandled markers were {}".
            format(mySettings['unhandledMarkers']))
        vPrint(
            'Normal', debuggingThisModule, "  " +
            _("WARNING: Unhandled createMySwordModule markers were {}").format(
                mySettings['unhandledMarkers']))
    unhandledBooks = []
    for BBB in self.getBookList():
        if BBB not in handledBooks: unhandledBooks.append(BBB)
    if unhandledBooks:
        logging.warning("createMySwordModule: Unhandled books were {}".format(
            unhandledBooks))
        vPrint(
            'Normal', debuggingThisModule, "  " +
            _("WARNING: Unhandled createMySwordModule books were {}").format(
                unhandledBooks))

    # Now create the gzipped file
    vPrint('Info', debuggingThisModule,
           "  Compressing {} MySword file…".format(filename))
    tar = tarfile.open(filepath + '.gz', 'w:gz')
    tar.add(filepath)
    tar.close()

    if BibleOrgSysGlobals.verbosityLevel > 0 and BibleOrgSysGlobals.maxProcesses > 1:
        vPrint('Quiet', debuggingThisModule,
               "  BibleWriter.createMySwordModule finished successfully.")
    return True
Exemplo n.º 6
0
def main() -> None:
    """
    This is the main program for the app
        which just tries to open and load some kind of Bible file(s)
            from the inputFolder that you specified
        and then export a PhotoBible (in the default BOSOutputFiles folder).

    Note that the standard verbosityLevel is 2:
        -s (silent) is 0
        -q (quiet) is 1
        -i (information) is 3
        -v (verbose) is 4.
    """
    BibleOrgSysGlobals.introduceProgram(__name__, programNameVersion,
                                        LAST_MODIFIED_DATE)

    ourBibleOrganisationalSystem = BibleOrganisationalSystem(
        "GENERIC-KJV-66-ENG")
    ourVersificationSystem = ourBibleOrganisationalSystem.getVersificationSystemName(
    )
    ourBibleSingleReference = BibleSingleReference(
        ourBibleOrganisationalSystem)

    vPrint('Quiet', debuggingThisModule, _("Use QUIT or EXIT to finish."))

    while True:  # Loop until they stop it
        userInput = input('\n' + _(
            "Enter a verse number 1..31102 or a single Bible verse reference (or QUIT): "
        ))
        if userInput.lower() in (
                'exit',
                'quit',
                'q',
                'stop',
                'halt',
        ):
            break

        # See if it's an absolute verse number
        try:
            userInt = int(userInput)
        except ValueError:
            userInt = None
        if userInt:
            if 1 <= userInt <= 31102:
                BBB, C, V = ourBibleOrganisationalSystem.convertAbsoluteVerseNumber(
                    userInt)
                vPrint(
                    'Quiet', debuggingThisModule,
                    _("{} verse number {} is {} {}:{}").format(
                        ourVersificationSystem, userInt, BBB, C, V))
            else:
                vPrint('Quiet', debuggingThisModule,
                       _("Absolute verse numbers must be in range 1..31,102."))

        else:  # assume it's a Bible reference
            adjustedUserInput = userInput
            if ':' not in adjustedUserInput:
                for alternative in (
                        '.',
                        ',',
                        '-',
                ):  # Handle possible alternative C:V punctuations
                    if alternative in adjustedUserInput:
                        adjustedUserInput = adjustedUserInput.replace(
                            alternative, ':', 1)
                        break
            results = ourBibleSingleReference.parseReferenceString(
                adjustedUserInput)
            #dPrint( 'Quiet', debuggingThisModule, results )
            successFlag, haveWarnings, BBB, C, V, S = results
            if successFlag:
                vPrint(
                    'Quiet', debuggingThisModule,
                    _("{!r} converted to {} {}:{} in our internal system.").
                    format(userInput, BBB, C, V))
                absoluteVerseNumber = ourBibleOrganisationalSystem.getAbsoluteVerseNumber(
                    BBB, C, V)
                vPrint(
                    'Quiet', debuggingThisModule,
                    _("  {} {}:{} is verse number {:,} in the {} versification system."
                      ).format(BBB, C, V, absoluteVerseNumber,
                               ourVersificationSystem))
                if BibleOrgSysGlobals.debugFlag:
                    vPrint(
                        'Quiet', debuggingThisModule,
                        _("  {} {}:{} is verse number 0x{:04x} in the {} versification system."
                          ).format(BBB, C, V, absoluteVerseNumber,
                                   ourVersificationSystem))
            else:
                vPrint(
                    'Quiet', debuggingThisModule,
                    _("Unable to find a valid single verse reference in your input: {!r}"
                      ).format(userInput))
Exemplo n.º 7
0
class MySwordBible(Bible):
    """
    Class for reading, validating, and converting MySwordBible files.
    """
    def __init__(self, sourceFolder, givenFilename, encoding='utf-8') -> None:
        """
        Constructor: just sets up the Bible object.
        """
        # Setup and initialise the base class first
        Bible.__init__(self)
        self.objectNameString = 'MySword Bible object'
        self.objectTypeString = 'MySword'

        # Now we can set our object variables
        self.sourceFolder, self.sourceFilename, self.encoding = sourceFolder, givenFilename, encoding
        self.sourceFilepath = os.path.join(self.sourceFolder,
                                           self.sourceFilename)

        # Do a preliminary check on the readability of our file
        if not os.access(self.sourceFilepath, os.R_OK):
            logging.critical(
                _("MySwordBible: File {!r} is unreadable").format(
                    self.sourceFilepath))

        filenameBits = os.path.splitext(self.sourceFilename)
        self.name = filenameBits[0]
        self.fileExtension = filenameBits[1]

        #if self.fileExtension.upper().endswith('X'):
        #logging.warning( _("MySwordBible: File {!r} is encrypted").format( self.sourceFilepath ) )

    # end of MySwordBible.__init__

    def preload(self):
        """
        Load the metadata from the SQLite3 database.
        """
        fnPrint(debuggingThisModule, "preload()…")

        vPrint('Info', debuggingThisModule,
               _("Preloading {}…").format(self.sourceFilepath))

        fileExtensionUpper = self.fileExtension.upper()
        if fileExtensionUpper not in FILENAME_ENDINGS_TO_ACCEPT:
            logging.critical("{} doesn't appear to be a MySword file".format(
                self.sourceFilename))
        elif not self.sourceFilename.upper().endswith(
                BIBLE_FILENAME_ENDINGS_TO_ACCEPT[0]):
            logging.critical(
                "{} doesn't appear to be a MySword Bible file".format(
                    self.sourceFilename))

        connection = sqlite3.connect(self.sourceFilepath)
        connection.row_factory = sqlite3.Row  # Enable row names
        self.cursor = connection.cursor()

        # First get the settings
        if self.suppliedMetadata is None: self.suppliedMetadata = {}
        self.suppliedMetadata['MySword'] = {}
        self.cursor.execute('select * from Details')
        row = self.cursor.fetchone()
        for key in row.keys():
            self.suppliedMetadata['MySword'][key] = row[key]
        #dPrint( 'Quiet', debuggingThisModule, self.suppliedMetadata['MySword'] ); halt
        #if 'Description' in self.settingsDict and len(self.settingsDict['Description'])<40: self.name = self.settingsDict['Description']
        #if 'Abbreviation' in self.settingsDict: self.abbreviation = self.settingsDict['Abbreviation']
        if 'encryption' in self.suppliedMetadata['MySword']:
            logging.critical("{} is encrypted: level {}".format(
                self.sourceFilename,
                self.suppliedMetadata['MySword']['encryption']))

        self.BibleOrganisationalSystem = BibleOrganisationalSystem(
            'GENERIC-KJV-66-ENG')

        self.preloadDone = True

    # end of MySwordBible.preload

    def load(self):
        """
        Load all the books out of the SQLite3 database.
        """
        fnPrint(debuggingThisModule, "load()…")
        assert self.preloadDone

        vPrint('Info', debuggingThisModule,
               _("Loading {}…").format(self.sourceFilepath))

        if self.suppliedMetadata['MySword']['OT'] and self.suppliedMetadata[
                'MySword']['NT']:
            testament, BBB = 'BOTH', 'GEN'
            booksExpected, textLineCountExpected = 66, 31102
        elif self.suppliedMetadata['MySword']['OT']:
            testament, BBB = 'OT', 'GEN'
            booksExpected, textLineCountExpected = 39, 23145
        elif self.suppliedMetadata['MySword']['NT']:
            testament, BBB = 'NT', 'MAT'
            booksExpected, textLineCountExpected = 27, 7957

        # Create the first book
        thisBook = BibleBook(self, BBB)
        thisBook.objectNameString = 'MySword Bible Book object'
        thisBook.objectTypeString = 'MySword'

        verseList = self.BibleOrganisationalSystem.getNumVersesList(BBB)
        numC, numV = len(verseList), verseList[0]
        nBBB = BibleOrgSysGlobals.loadedBibleBooksCodes.getReferenceNumber(BBB)
        C = V = 1

        bookCount = 0
        ourGlobals = {}
        continued = ourGlobals['haveParagraph'] = False
        haveLines = False
        while True:
            self.cursor.execute(
                'select Scripture from Bible where Book=? and Chapter=? and Verse=?',
                (nBBB, C, V))
            try:
                row = self.cursor.fetchone()
                line = row[0]
            except TypeError:  # This reference is missing (row is None)
                #dPrint( 'Quiet', debuggingThisModule, "something wrong at", BBB, C, V )
                #if BibleOrgSysGlobals.debugFlag: halt
                #dPrint( 'Quiet', debuggingThisModule, row )
                line = None
            #dPrint( 'Quiet', debuggingThisModule, nBBB, BBB, C, V, 'MySw file line is "' + line + '"' )
            if line is None:
                logging.warning(
                    "MySwordBible.load: Have missing verse line at {} {}:{}".
                    format(BBB, C, V))
            else:  # line is not None
                if not isinstance(line, str):
                    if 'encryption' in self.suppliedMetadata['MySword']:
                        logging.critical(
                            "MySwordBible.load: Unable to decrypt verse line at {} {}:{} {!r}"
                            .format(BBB, C, V, line))
                        break
                    else:
                        logging.critical(
                            "MySwordBible.load: Unable to decode verse line at {} {}:{} {!r} {}"
                            .format(BBB, C, V, line,
                                    self.suppliedMetadata['MySword']))
                elif not line:
                    logging.warning(
                        "MySwordBible.load: Found blank verse line at {} {}:{}"
                        .format(BBB, C, V))
                else:
                    haveLines = True

                    # Some modules end lines with \r\n or have it in the middle!
                    #   (We just ignore these for now)
                    while line and line[-1] in '\r\n':
                        line = line[:-1]
                    if '\r' in line or '\n' in line:  # (in the middle)
                        logging.warning(
                            "MySwordBible.load: Found CR or LF characters in verse line at {} {}:{}"
                            .format(BBB, C, V))
                    line = line.replace('\r\n',
                                        ' ').replace('\r',
                                                     ' ').replace('\n', ' ')

            #dPrint( 'Quiet', debuggingThisModule, "MySword.load", BBB, C, V, repr(line) )
            handleRTFLine(self.name, BBB, C, V, line, thisBook, ourGlobals)
            V += 1
            if V > numV:
                C += 1
                if C > numC:  # Save this book now
                    if haveLines:
                        vPrint('Verbose', debuggingThisModule,
                               "  MySword saving", BBB, bookCount + 1)
                        self.stashBook(thisBook)
                    #else: vPrint( 'Quiet', debuggingThisModule, "Not saving", BBB )
                    bookCount += 1  # Not the number saved but the number we attempted to process
                    if bookCount >= booksExpected: break
                    BBB = self.BibleOrganisationalSystem.getNextBookCode(BBB)
                    # Create the next book
                    thisBook = BibleBook(self, BBB)
                    thisBook.objectNameString = 'MySword Bible Book object'
                    thisBook.objectTypeString = 'MySword'
                    haveLines = False

                    verseList = self.BibleOrganisationalSystem.getNumVersesList(
                        BBB)
                    numC, numV = len(verseList), verseList[0]
                    nBBB = BibleOrgSysGlobals.loadedBibleBooksCodes.getReferenceNumber(
                        BBB)
                    C = V = 1
                    #thisBook.addLine( 'c', str(C) )
                else:  # next chapter only
                    #thisBook.addLine( 'c', str(C) )
                    numV = verseList[C - 1]
                    V = 1

            if ourGlobals['haveParagraph']:
                thisBook.addLine('p', '')
                ourGlobals['haveParagraph'] = False

        self.cursor.close()
        del self.cursor
        self.applySuppliedMetadata('MySword')  # Copy some to self.settingsDict
        self.doPostLoadProcessing()

    # end of MySwordBible.load

    def loadBook(self, BBB: str):
        """
        Load the requested book out of the SQLite3 database.
        """
        fnPrint(debuggingThisModule, "loadBook( {} )".format(BBB))
        assert self.preloadDone

        if BBB in self.books:
            dPrint('Quiet', debuggingThisModule,
                   "  {} is already loaded -- returning".format(BBB))
            return  # Already loaded
        if BBB in self.triedLoadingBook:
            logging.warning(
                "We had already tried loading MySwordBible {} for {}".format(
                    BBB, self.name))
            return  # We've already attempted to load this book
        self.triedLoadingBook[BBB] = True
        self.bookNeedsReloading[BBB] = False
        if BibleOrgSysGlobals.verbosityLevel > 2 or BibleOrgSysGlobals.debugFlag:
            vPrint(
                'Quiet', debuggingThisModule,
                _("MySwordBible: Loading {} from {}…").format(
                    BBB, self.sourceFilepath))

        #if self.suppliedMetadata['MySword']['OT'] and self.suppliedMetadata['MySword']['NT']:
        #testament, BBB = 'BOTH', 'GEN'
        #booksExpected, textLineCountExpected = 1, 31102
        #elif self.suppliedMetadata['MySword']['OT']:
        #testament, BBB = 'OT', 'GEN'
        #booksExpected, textLineCountExpected = 1, 23145
        #elif self.suppliedMetadata['MySword']['NT']:
        #testament, BBB = 'NT', 'MAT'
        #booksExpected, textLineCountExpected = 1, 7957

        # Create the first book
        thisBook = BibleBook(self, BBB)
        thisBook.objectNameString = 'MySword Bible Book object'
        thisBook.objectTypeString = 'MySword'

        verseList = self.BibleOrganisationalSystem.getNumVersesList(BBB)
        numC, numV = len(verseList), verseList[0]
        nBBB = BibleOrgSysGlobals.loadedBibleBooksCodes.getReferenceNumber(BBB)
        C = V = 1

        #bookCount = 0
        ourGlobals = {}
        continued = ourGlobals['haveParagraph'] = False
        haveLines = False
        while True:
            self.cursor.execute(
                'select Scripture from Bible where Book=? and Chapter=? and Verse=?',
                (nBBB, C, V))
            try:
                row = self.cursor.fetchone()
                line = row[0]
            except TypeError:  # This reference is missing (row is None)
                #dPrint( 'Quiet', debuggingThisModule, "something wrong at", BBB, C, V )
                #if BibleOrgSysGlobals.debugFlag: halt
                #dPrint( 'Quiet', debuggingThisModule, row )
                line = None
            #dPrint( 'Quiet', debuggingThisModule, nBBB, BBB, C, V, 'MySw file line is "' + line + '"' )
            if line is None:
                logging.warning(
                    "MySwordBible.load: Have missing verse line at {} {}:{}".
                    format(BBB, C, V))
            else:  # line is not None
                if not isinstance(line, str):
                    if 'encryption' in self.suppliedMetadata['MySword']:
                        logging.critical(
                            "MySwordBible.load: Unable to decrypt verse line at {} {}:{} {!r}"
                            .format(BBB, C, V, line))
                        break
                    else:
                        logging.critical(
                            "MySwordBible.load: Unable to decode verse line at {} {}:{} {!r} {}"
                            .format(BBB, C, V, line,
                                    self.suppliedMetadata['MySword']))
                elif not line:
                    logging.warning(
                        "MySwordBible.load: Found blank verse line at {} {}:{}"
                        .format(BBB, C, V))
                else:
                    haveLines = True

                    # Some modules end lines with \r\n or have it in the middle!
                    #   (We just ignore these for now)
                    while line and line[-1] in '\r\n':
                        line = line[:-1]
                    if '\r' in line or '\n' in line:  # (in the middle)
                        logging.warning(
                            "MySwordBible.load: Found CR or LF characters in verse line at {} {}:{}"
                            .format(BBB, C, V))
                    line = line.replace('\r\n',
                                        ' ').replace('\r',
                                                     ' ').replace('\n', ' ')

            #dPrint( 'Quiet', debuggingThisModule, "MySword.load", BBB, C, V, repr(line) )
            handleRTFLine(self.name, BBB, C, V, line, thisBook, ourGlobals)
            V += 1
            if V > numV:
                C += 1
                if C <= numC:  # next chapter only
                    #thisBook.addLine( 'c', str(C) )
                    numV = verseList[C - 1]
                    V = 1
                else:  # Save this book now
                    if haveLines:
                        vPrint('Info', debuggingThisModule, "  MySword saving",
                               BBB)
                        self.stashBook(thisBook)
                    #else: vPrint( 'Quiet', debuggingThisModule, "Not saving", BBB )
                    break

            if ourGlobals['haveParagraph']:
                thisBook.addLine('p', '')
                ourGlobals['haveParagraph'] = False
    def load(self):
        """
        Load a single source file and load book elements.
        """
        vPrint('Info', debuggingThisModule,
               _("Loading {}…").format(self.sourceFilepath))

        global BOS66, BOS81, BOSx
        if BOS66 is None:
            BOS66 = BibleOrganisationalSystem('GENERIC-KJV-66-ENG')
        if BOS81 is None:
            BOS81 = BibleOrganisationalSystem('GENERIC-KJV-80-ENG')
        if BOSx is None: BOSx = BibleOrganisationalSystem('GENERIC-ENG')

        if self.suppliedMetadata is None: self.suppliedMetadata = {}

        lastLine, lineCount = '', 0
        bookCode = BBB = metadataName = None
        lastBookCode = lastChapterNumber = lastVerseNumber = -1
        lastVText = ''
        thisBook = None
        settingsDict = {}
        with open(self.sourceFilepath, encoding=self.encoding
                  ) as myFile:  # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                if line[-1] == '\n':
                    line = line[:-1]  # Removing trailing newline character
                if not line: continue  # Just discard blank lines
                if lineCount == 1:
                    if self.encoding.lower() == 'utf-8' and line[0] == chr(
                            65279):  #U+FEFF or \ufeff
                        logging.info(
                            "      ForgeForSwordSearcherBible.load: Detected Unicode Byte Order Marker (BOM)"
                        )
                        line = line[
                            1:]  # Remove the Unicode Byte Order Marker (BOM)
                    match = re.search('^; TITLE:\\s', line)
                    if match:
                        if BibleOrgSysGlobals.debugFlag:
                            vPrint(
                                'Quiet', debuggingThisModule,
                                "First line got type {!r} match from {!r}".
                                format(match.group(0), line))
                    else:
                        vPrint(
                            'Verbose', debuggingThisModule,
                            "ForgeForSwordSearcherBible.load: (unexpected) first line was {!r} in {}"
                            .format(firstLine, thisFilename))
                        if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                            halt
                        continue

                #vPrint( 'Quiet', debuggingThisModule, 'ForgeForSwordSearcher file line is "' + line + '"' )
                lastLine = line

                # Process header stuff
                if line.startswith('; TITLE:'):
                    string = line[8:].strip()
                    if string: settingsDict['TITLE'] = string
                    continue
                elif line.startswith('; ABBREVIATION:'):
                    string = line[15:].strip()
                    if string: settingsDict['ABBREVIATION'] = string
                    continue
                elif line.startswith('; HAS ITALICS'):
                    string = line[14:].strip()
                    if string: settingsDict['HAS_ITALICS'] = string
                    continue
                elif line.startswith('; HAS FOOTNOTES:'):
                    string = line[15:].strip()
                    if string: settingsDict['HAS_FOOTNOTES'] = string
                    continue
                elif line.startswith('; HAS FOOTNOTES'):
                    string = line[14:].strip()
                    if string: settingsDict['HAS_FOOTNOTES'] = string
                    continue
                elif line.startswith('; HAS REDLETTER'):
                    string = line[14:].strip()
                    if string: settingsDict['HAS_REDLETTER'] = string
                    continue
                elif line[0] == ';':
                    logging.warning(
                        "ForgeForSwordSearcherBible.load is skipping unknown header/comment line: {}"
                        .format(line))
                    continue  # Just discard comment lines

                # Process the main segment
                if line.startswith('$$ '):
                    if metadataName and metadataContents:
                        settingsDict[metadataName] = metadataContents
                        metadataName = None
                    pointer = line[3:]
                    #vPrint( 'Quiet', debuggingThisModule, "pointer", repr(pointer) )
                    if pointer and pointer[0] == '{' and pointer[-1] == '}':
                        metadataName = pointer[1:-1]
                        if metadataName:
                            #vPrint( 'Quiet', debuggingThisModule, "metadataName", repr(metadataName) )
                            metadataContents = ''
                    else:  # let's assume it's a BCV reference
                        pointer = pointer.replace( '1 K','1K' ).replace( '2 K','2K' ) \
                                        .replace( '1 Chr','1Chr' ).replace( '2 Chr','2Chr' ) \
                                        .replace( '1 Cor','1Cor' ).replace( '2 Cor','2Cor' ) \
                                        .replace( '1 Thess','1Thess' ).replace( '2 Thess','2Thess' ) \
                                        .replace( '1 Tim','1Tim' ).replace( '2 Tim','2Tim' ) \
                                        .replace( '1 Pet','1Pet' ).replace( '2 Pet','2Pet' ) \
                                        .replace( '1 J','1J' ).replace( '2 J','2J' ).replace( '3 J','3J' )
                        B_CV_Bits = pointer.split(' ', 1)
                        if len(B_CV_Bits) == 2 and ':' in B_CV_Bits[1]:
                            bookCode, CVString = B_CV_Bits
                            chapterNumberString, verseNumberString = CVString.split(
                                ':')
                            chapterNumber = int(chapterNumberString)
                            verseNumber = int(verseNumberString)
                            if bookCode != lastBookCode:  # We've started a new book
                                if bookCode in ('Ge', ): BBB = 'GEN'
                                elif bookCode in ('Le', ): BBB = 'LEV'
                                elif bookCode in ('La', ):
                                    BBB = 'LAM'
                                    ##elif bookCode in ('Es',): BBB = 'EST'
                                    ##elif bookCode in ('Pr',): BBB = 'PRO'
                                    #elif bookCode in ('So',): BBB = 'SNG'
                                    #elif bookCode in ('La',): BBB = 'LAM'
                                    #elif bookCode in ('Jude',): BBB = 'JDE'
                                else:
                                    #vPrint( 'Quiet', debuggingThisModule, "4BookCode =", repr(bookCode) )
                                    #BBB = BOS.getBBBFromText( bookCode )  # Try to guess
                                    BBB = BOS66.getBBBFromText(
                                        bookCode)  # Try to guess
                                    if not BBB:
                                        BBB = BOS81.getBBBFromText(
                                            bookCode)  # Try to guess
                                    if not BBB:
                                        BBB = BOSx.getBBBFromText(
                                            bookCode)  # Try to guess
                                    #vPrint( 'Quiet', debuggingThisModule, "4BBB =", repr(BBB) )
                        else:
                            vPrint('Quiet', debuggingThisModule,
                                   "Unexpected number of bits", self.givenName,
                                   BBB, bookCode, chapterNumberString,
                                   verseNumberString, len(bits), bits)
                    continue  # Just save the pointer information which refers to the text on the next line
                else:  # it's not a $$ line
                    text = line
                    #vPrint( 'Quiet', debuggingThisModule, "text", repr(text) )
                    if metadataName:
                        metadataContents += ('\n' if metadataContents else
                                             '') + text
                        continue
                    else:
                        vText = text
                        # Handle bits like (<scripref>Pr 2:7</scripref>)
                        vText = vText.replace('(<scripref>',
                                              '\\x - \\xt ').replace(
                                                  '</scripref>)', '\\x*')
                        vText = vText.replace('<scripref>',
                                              '\\x - \\xt ').replace(
                                                  '</scripref>', '\\x*')
                        #if '\\' in vText: vPrint( 'Quiet', debuggingThisModule, 'ForgeForSwordSearcher vText', repr(vText) )
                        #vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, verseNumber, repr(vText) )
                        # Convert {stuff} to footnotes
                        match = re.search('\\{(.+?)\\}', vText)
                        while match:
                            footnoteText = '\\f + \\fr {}:{} \\ft {}\\f*'.format(
                                chapterNumber, verseNumber, match.group(1))
                            vText = vText[:match.start(
                            )] + footnoteText + vText[
                                match.end():]  # Replace this footnote
                            #vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, verseNumber, repr(vText) )
                            match = re.search('\\{(.+?)\\}', vText)
                        # Convert [stuff] to added fields
                        match = re.search('\\[(.+?)\\]', vText)
                        while match:
                            addText = '\\add {}\\add*'.format(match.group(1))
                            vText = vText[:match.start()] + addText + vText[
                                match.end():]  # Replace this chunk
                            #vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, verseNumber, repr(vText) )
                            match = re.search('\\[(.+?)\\]', vText)
                        # Convert +r/This text is red-letter-r/ to wj fields
                        match = re.search('\\+r/(.+?)-r/', vText)
                        while match:
                            addText = '\\wj {}\\wj*'.format(match.group(1))
                            vText = vText[:match.start()] + addText + vText[
                                match.end():]  # Replace this chunk
                            #vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, verseNumber, repr(vText) )
                            match = re.search('\\+r/(.+?)-r/', vText)
                        # Final check for unexpected remaining formatting
                        for badChar in '{}[]/':
                            if badChar in vText:
                                logging.warning(
                                    "Found remaining braces,brackets or slashes in SwordSearcher Forge VPL {} {}:{} {!r}"
                                    .format(BBB, chapterNumberString,
                                            verseNumberString, vText))
                                break

                if bookCode:
                    if bookCode != lastBookCode:  # We've started a new book
                        if lastBookCode != -1:  # Better save the last book
                            self.stashBook(thisBook)
                        if BBB:
                            if BBB in self:
                                logging.critical(
                                    "Have duplicated {} book in {}".format(
                                        self.givenName, BBB))
                            if BibleOrgSysGlobals.debugFlag:
                                assert BBB not in self
                            thisBook = BibleBook(self, BBB)
                            thisBook.objectNameString = 'ForgeForSwordSearcher Bible Book object'
                            thisBook.objectTypeString = 'ForgeForSwordSearcher'
                            verseList = BOSx.getNumVersesList(BBB)
                            numChapters, numVerses = len(
                                verseList), verseList[0]
                            lastBookCode = bookCode
                            lastChapterNumber = lastVerseNumber = -1
                        else:
                            logging.critical(
                                "ForgeForSwordSearcherBible could not figure out {!r} book code"
                                .format(bookCode))
                            if BibleOrgSysGlobals.debugFlag: halt

                    if BBB:
                        if chapterNumber != lastChapterNumber:  # We've started a new chapter
                            if BibleOrgSysGlobals.debugFlag:
                                assert chapterNumber > lastChapterNumber or BBB == 'ESG'  # Esther Greek might be an exception
                            if chapterNumber == 0:
                                logging.info(
                                    "Have chapter zero in {} {} {} {}:{}".
                                    format(self.givenName, BBB, bookCode,
                                           chapterNumberString,
                                           verseNumberString))
                            elif chapterNumber > numChapters:
                                logging.error(
                                    "Have high chapter number in {} {} {} {}:{} (expected max of {})"
                                    .format(self.givenName, BBB, bookCode,
                                            chapterNumberString,
                                            verseNumberString, numChapters))
                            thisBook.addLine('c', chapterNumberString)
                            lastChapterNumber = chapterNumber
                            lastVerseNumber = -1

                        # Handle the verse info
                        if verseNumber == lastVerseNumber and vText == lastVText:
                            logging.warning(
                                _("Ignored duplicate verse line in {} {} {} {}:{}"
                                  ).format(self.givenName, BBB, bookCode,
                                           chapterNumberString,
                                           verseNumberString))
                            continue
                        if verseNumber < lastVerseNumber:
                            logging.warning(
                                _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}"
                                  ).format(lastVerseNumber, verseNumber,
                                           self.givenName, BBB, bookCode,
                                           chapterNumberString,
                                           verseNumberString))
                        elif verseNumber == lastVerseNumber:
                            if vText == lastVText:
                                logging.warning(
                                    _("Ignored duplicated {} verse in {} {} {} {}:{}"
                                      ).format(verseNumber, self.givenName,
                                               BBB, bookCode,
                                               chapterNumberString,
                                               verseNumberString))
                            else:
                                logging.warning(
                                    _("Ignored duplicated {} verse number in {} {} {} {}:{}"
                                      ).format(verseNumber, self.givenName,
                                               BBB, bookCode,
                                               chapterNumberString,
                                               verseNumberString))

                        # Check for paragraph markers
                        if vText and vText[0] == '¶':
                            thisBook.addLine('p', '')
                            vText = vText[1:].lstrip()

                        #vPrint( 'Quiet', debuggingThisModule, '{} {}:{} = {!r}'.format( BBB, chapterNumberString, verseNumberString, vText ) )
                        thisBook.addLine('v', verseNumberString + ' ' + vText)
                        lastVText = vText
                        lastVerseNumber = verseNumber

                else:  # No bookCode yet
                    logging.warning(
                        "ForgeForSwordSearcherBible.load is skipping unknown pre-book line: {}"
                        .format(line))

        # Save the final book
        if thisBook is not None: self.stashBook(thisBook)

        # Clean up
        if settingsDict:
            #vPrint( 'Quiet', debuggingThisModule, "ForgeForSwordSearcher settingsDict", settingsDict )
            if self.suppliedMetadata is None: self.suppliedMetadata = {}
            self.suppliedMetadata['Forge4SS'] = settingsDict
            self.applySuppliedMetadata(
                'Forge4SS')  # Copy some to self.settingsDict

        self.doPostLoadProcessing()
Exemplo n.º 9
0
    def preload(self):
        """
        Loads the Metadata file if it can be found.
        """
        fnPrint(debuggingThisModule,
                "preload() from {}".format(self.sourceFilepath))

        self.unzippedFolderpath = tempfile.mkdtemp(suffix='_GoBible',
                                                   prefix='BOS_')
        vPrint('Info', debuggingThisModule,
               "Extracting files into {}…".format(self.unzippedFolderpath))
        with zipfile.ZipFile(self.sourceFilepath) as myzip:
            # NOTE: Could be a security risk here
            myzip.extractall(self.unzippedFolderpath)

        # Do a preliminary check on the contents of our folder
        foundFiles, foundFolders = [], []
        for something in os.listdir(self.unzippedFolderpath):
            somepath = os.path.join(self.unzippedFolderpath, something)
            if os.path.isdir(somepath): foundFolders.append(something)
            elif os.path.isfile(somepath): foundFiles.append(something)
            else:
                logging.error(
                    "GoBible.preload: Not sure what {!r} is in {}!".format(
                        somepath, self.unzippedFolderpath))
        numVitalFolders = 0
        if foundFolders:
            unexpectedFolders = []
            for folderName in foundFolders:
                if folderName in BibleOrgSysGlobals.COMMONLY_IGNORED_FOLDERS:
                    continue
                if folderName in ('Bible Data', 'META-INF'):
                    numVitalFolders += 1
                    continue
                unexpectedFolders.append(folderName)
            if unexpectedFolders:
                logging.info(
                    _("GoBible.preload: Surprised to see subfolders in {!r}: {}"
                      ).format(self.unzippedFolderpath, unexpectedFolders))
        if not foundFiles:
            vPrint(
                'Quiet', debuggingThisModule,
                "GoBible.preload: Couldn't find any files in {!r}".format(
                    self.unzippedFolderpath))
            raise FileNotFoundError  # No use continuing
        if not numVitalFolders:
            vPrint(
                'Quiet', debuggingThisModule,
                "GoBible.preload: Couldn't find any vital folders in {!r}".
                format(self.unzippedFolderpath))
            raise FileNotFoundError  # No use continuing

        self.dataFolderpath = os.path.join(self.unzippedFolderpath,
                                           'Bible Data/')
        if not os.path.isdir(self.dataFolderpath):
            logging.critical(
                _("GoBible.preload: Unable to find folder: {}").format(
                    self.dataFolderpath))

        # Do a preliminary check on the contents of our subfolder
        #self.discoveredBookList = []
        foundFiles, foundFolders = [], []
        for something in os.listdir(self.dataFolderpath):
            somepath = os.path.join(self.dataFolderpath, something)
            if os.path.isdir(somepath): foundFolders.append(something)
            elif os.path.isfile(somepath): foundFiles.append(something)
            else:
                logging.error(
                    "GoBible.preload: Not sure what {!r} is in {}!".format(
                        somepath, self.dataFolderpath))
        numBookFolders = 0
        if foundFolders:
            unexpectedFolders = []
            for folderName in foundFolders:
                if folderName in BibleOrgSysGlobals.COMMONLY_IGNORED_FOLDERS:
                    continue
                folderNameLower = folderName.lower()
                if folderNameLower.endswith('sfm'):  # .sfm or .usfm
                    numBookFolders += 1
                    bookCode = folderName[:-4]
                    # Code below doesn't work -- foldernames vary
                    #BBB = BibleOrgSysGlobals.loadedBibleBooksCodes.getBBBFromOSISAbbreviation( bookCode )
                    #self.discoveredBookList.append( BBB )
                    continue
                unexpectedFolders.append(folderName)
            if unexpectedFolders:
                logging.info(
                    _("GoBible.preload: Surprised to see subfolders in {!r}: {}"
                      ).format(self.dataFolderpath, unexpectedFolders))
        if not foundFiles:
            vPrint(
                'Quiet', debuggingThisModule,
                "GoBible.preload: Couldn't find any files in {!r}".format(
                    self.dataFolderpath))
            raise FileNotFoundError  # No use continuing
        if not numBookFolders:
            vPrint(
                'Quiet', debuggingThisModule,
                "GoBible.preload: Couldn't find any book folders in {!r}".
                format(self.dataFolderpath))
            raise FileNotFoundError  # No use continuing
        #dPrint( 'Never', debuggingThisModule, "GoBible.preload: Discovered", self.discoveredBookList )

        def readInString(fileBytes, fileIndex):
            """
            Strings have a single byte length, then the UTF-8 characters, then a trailing null.
            """
            stringLength = fileBytes[fileIndex]
            fileIndex += 1
            result = ""
            while True:
                nextChar = fileBytes[fileIndex]
                fileIndex += 1
                if not nextChar: break  # found the trailing null
                result += chr(nextChar)
            assert len(result) == stringLength  # Read string correctly
            return result, stringLength + 2

        # Load the Index file
        with open(os.path.join(self.dataFolderpath, 'Index'),
                  'rb') as main_index_file:
            mainIndexContents = main_index_file.read()
        index = 0
        numBooks, = struct.unpack("<H", mainIndexContents[index:index + 2])
        index += 2
        vPrint('Never', debuggingThisModule, "numBooks", numBooks)

        self.bookNames, self.filenameBases, self.startChapters, self.numChaptersList, self.numVersesList = [], [], [], [], []
        for bookIndex in range(numBooks):
            #dPrint( 'Quiet', debuggingThisModule, "\nbookIndex", bookIndex )

            # Read in the name of the book
            bookName, consumedBytes = readInString(mainIndexContents, index)
            vPrint('Never', debuggingThisModule, "bookName", repr(bookName))
            self.bookNames.append(bookName)
            index += consumedBytes

            # Read in the short book name
            filenameBase, consumedBytes = readInString(mainIndexContents,
                                                       index)
            vPrint('Never', debuggingThisModule, "filenameBase",
                   repr(filenameBase))
            self.filenameBases.append(filenameBase)
            index += consumedBytes

            startChapter, = struct.unpack("<H",
                                          mainIndexContents[index:index + 2])
            index += 2
            vPrint('Never', debuggingThisModule, "startChapter", startChapter)
            self.startChapters.append(startChapter)

            # Read in the number of chapters in this book
            numChapters, = struct.unpack("<H",
                                         mainIndexContents[index:index + 2])
            index += 2
            vPrint('Never', debuggingThisModule, "numChapters", numChapters)
            self.numChaptersList.append(numChapters)

            # Read in the file number, verse offset, and number of verses for each chapter
            versesPerChapter = []
            previousFileNumber = 0
            verseDataOffset = 0
            for chapterIndex in range(numChapters):
                # Seems that each entry is six bytes
                vPrint('Never', debuggingThisModule, chapterIndex,
                       mainIndexContents[index:index + 6])
                if 1:
                    allVersesLength, = struct.unpack(
                        ">I", mainIndexContents[index:index + 4])
                    index += 4
                    numVerses = mainIndexContents[index]
                    index += 1
                    # Seems that file number for final chapter is always zero (or missing for the last book)!!!
                    try:
                        fileNumber = mainIndexContents[index]
                        index += 1
                    except IndexError:
                        fileNumber = 0  # Why??? (will be adjusted just below)

                    # Why do we need this ???
                    if fileNumber == 0 and previousFileNumber > 0:
                        vPrint(
                            'Never', debuggingThisModule,
                            "Don't know why but: Adjusting file number from 0 to",
                            previousFileNumber)
                        fileNumber = previousFileNumber

                    if fileNumber != previousFileNumber:
                        verseDataOffset = 0
                        previousFileNumber = fileNumber
                else:
                    fileNumber, = struct.unpack(
                        "<H", mainIndexContents[index:index + 2])
                    index += 2
                    allVersesLength, = struct.unpack(
                        "<I", mainIndexContents[index:index + 4])
                    index += 3
                    try:
                        numVerses = mainIndexContents[index]
                        index += 1
                    except struct.error:
                        numVerses = -1  # Why does it fail for the last chapter of Revelation???

                    if fileNumber != previousFileNumber:
                        verseDataOffset = 0
                        previousFileNumber = fileNumber

                versesPerChapter.append(
                    (numVerses, fileNumber, verseDataOffset, allVersesLength))
                if debuggingThisModule:
                    vPrint(
                        'Quiet', debuggingThisModule,
                        f"Book #{bookIndex+1}   chapter {chapterIndex+1:3}: {fileNumber}, {verseDataOffset:,}, {allVersesLength:,}, {numVerses}"
                    )

                verseDataOffset += allVersesLength
            self.numVersesList.append(versesPerChapter)
        assert index == len(mainIndexContents)

        self.BibleOrgSystem = BibleOrganisationalSystem('GENERIC-KJV-66')
        if numBooks == 66:
            self.bookList = [
                BibleOrgSysGlobals.loadedBibleBooksCodes.
                getBBBFromReferenceNumber(x) for x in range(1, 66 + 1)
            ]
        elif numBooks == 27:
            self.bookList = [
                BibleOrgSysGlobals.loadedBibleBooksCodes.
                getBBBFromReferenceNumber(x) for x in range(40, 66 + 1)
            ]
        else:
            logging.warning(
                f"GoBible.preload found {numBooks} books -- trying to figure out book codes"
            )
            self.bookList = []
            for n in range(numBooks):
                #dPrint( 'Quiet', debuggingThisModule, f"{n+1}/{numBooks}: Got '{self.bookNames[n]}' and '{self.filenameBases[n]}'" )
                BBB1 = BibleOrgSysGlobals.loadedBibleBooksCodes.getBBBFromText(
                    self.bookNames[n])
                BBB2 = BibleOrgSysGlobals.loadedBibleBooksCodes.getBBBFromText(
                    self.filenameBases[n])
                #dPrint( 'Quiet', debuggingThisModule, f"{n+1}/{numBooks}: Found {BBB1} and {BBB2}" )
                if BBB1 and (BBB2 == BBB1 or BBB2 is None): BBB = BBB1
                elif BBB2 and BBB1 is None: BBB = BBB2
                elif BBB1 and BBB2:
                    logging.error(
                        f"GoBible.preload choosing '{self.bookNames[n]}'->{BBB1} over '{self.filenameBases[n]}'->{BBB2}"
                    )
                    BBB = BBB1
                else:
                    BBB = BibleOrgSysGlobals.loadedBibleBooksCodes.getBBBFromReferenceNumber(
                        n)
                    logging.error(
                        f"GoBible.preload unable to discover book code from '{self.bookNames[n]}'->{BBB1} or '{self.filenameBases[n]}'->{BBB2}: assuming {BBB}"
                    )
                self.bookList.append(BBB)
        vPrint('Quiet', debuggingThisModule,
               "GoBible.preload: {} book details preloaded".format(numBooks))
        if len(self.bookList) != numBooks:
            logging.critical(
                f"GoBible.preload could only discover book codes for {len(self.bookList)}/{numBooks} books"
            )

        self.preloadDone = True
Exemplo n.º 10
0
    def load( self ):
        """
        Load a single source file and load book elements.
        """
        vPrint( 'Info', debuggingThisModule, _("Loading {}…").format( self.sourceFilepath ) )

        global BOS66, BOS81, BOSx
        if BOS66 is None: BOS66 = BibleOrganisationalSystem( 'GENERIC-KJV-66-ENG' )
        if BOS81 is None: BOS81 = BibleOrganisationalSystem( 'GENERIC-KJV-80-ENG' )
        if BOSx is None: BOSx = BibleOrganisationalSystem( 'GENERIC-ENG' )

        if self.suppliedMetadata is None: self.suppliedMetadata = {}

        lastLine, lineCount = '', 0
        vplType = bookCodeText = lastBookCodeText = BBB = lastBBB = metadataName = None
        lastChapterNumber = lastVerseNumber = -1
        lastVText = ''
        thisBook = None
        settingsDict = {}
        with open( self.sourceFilepath, encoding=self.encoding ) as myFile: # Automatically closes the file when done
            for line in myFile:
                lineCount += 1
                if line[-1]=='\n': line=line[:-1] # Removing trailing newline character
                if not line: continue # Just discard blank lines
                if lineCount==1:
                    if self.encoding.lower()=='utf-8' and line[0]==chr(65279): #U+FEFF or \ufeff
                        logging.info( "      VPLBible.load: Detected Unicode Byte Order Marker (BOM)" )
                        line = line[1:] # Remove the Unicode Byte Order Marker (BOM)
                    # Try to identify the VPL type
                    match = re.search( '^(\\w{2,5}?)\\s(\\d{1,3})[:\\.](\\d{1,3})\\s', line )
                    if match: vplType = 1
                    else:
                        match = re.search( '^(\\d{8})\\s', line )
                        if match: vplType = 2
                        else:
                            match = re.search( '^# language_name:\\s', line )
                            if match: vplType = 3
                            #else:
                                #match = re.search( '^; TITLE:\\s', line )
                                #if match: vplType = 4
                    if match:
                        if BibleOrgSysGlobals.debugFlag:
                            vPrint( 'Quiet', debuggingThisModule, "First line got type #{} {!r} match from {!r}".format( vplType, match.group(0), line ) )
                    else:
                        vPrint( 'Verbose', debuggingThisModule, "VPLBible.load: (unexpected) first line was {!r} in {}".format( line, self.sourceFilepath ) )
                        if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt
                        continue
                    #vPrint( 'Quiet', debuggingThisModule, 'vplType', vplType )

                #vPrint( 'Quiet', debuggingThisModule, 'VPL file line is "' + line + '"' )
                lastLine = line

                # Process header stuff
                if vplType == 3:
                    if   line.startswith( '# language_name:' ):
                        string = line[16:].strip()
                        if string and string != 'Not available': settingsDict['LanguageName'] = string
                        continue
                    elif line.startswith( '# closest ISO 639-3:' ):
                        string = line[20:].strip()
                        if string and string != 'Not available': settingsDict['ISOLanguageCode'] = string
                        continue
                    elif line.startswith( '# year_short:' ):
                        string = line[13:].strip()
                        if string and string != 'Not available': settingsDict['Year.short'] = string
                        continue
                    elif line.startswith( '# year_long:' ):
                        string = line[12:].strip()
                        if string and string != 'Not available': settingsDict['Year.long'] = string
                        continue
                    elif line.startswith( '# title:' ):
                        string = line[8:].strip()
                        if string and string != 'Not available': settingsDict['WorkTitle'] = string
                        continue
                    elif line.startswith( '# URL:' ):
                        string = line[6:].strip()
                        if string and string != 'Not available': settingsDict['URL'] = string
                        continue
                    elif line.startswith( '# copyright_short:' ):
                        string = line[18:].strip()
                        if string and string != 'Not available': settingsDict['Copyright.short'] = string
                        continue
                    elif line.startswith( '# copyright_long:' ):
                        string = line[17:].strip()
                        if string and string != 'Not available': settingsDict['Copyright.long'] = string
                        continue
                    elif line[0]=='#':
                        logging.warning( "VPLBible.load {} is skipping unknown line: {}".format( vplType, line ) )
                        continue # Just discard comment lines
                #elif vplType == 4:
                    #if line.startswith( '; TITLE:' ):
                        #string = line[8:].strip()
                        #if string: settingsDict['TITLE'] = string
                        #continue
                    #elif line.startswith( '; ABBREVIATION:' ):
                        #string = line[15:].strip()
                        #if string: settingsDict['ABBREVIATION'] = string
                        #continue
                    #elif line.startswith( '; HAS ITALICS:' ):
                        #string = line[15:].strip()
                        #if string: settingsDict['HAS_ITALICS'] = string
                        #continue
                    #elif line.startswith( '; HAS FOOTNOTES:' ):
                        #string = line[15:].strip()
                        #if string: settingsDict['HAS_FOOTNOTES'] = string
                        #continue
                    #elif line.startswith( '; HAS FOOTNOTES' ):
                        #string = line[14:].strip()
                        #if string: settingsDict['HAS_FOOTNOTES'] = string
                        #continue
                    #elif line.startswith( '; HAS REDLETTER:' ):
                        #string = line[15:].strip()
                        #if string: settingsDict['HAS_REDLETTER'] = string
                        #continue
                    #elif line[0]==';':
                        #logging.warning( "VPLBible.load{} is skipping unknown header/comment line: {}".format( vplType, line ) )
                        #continue # Just discard comment lines

                # Process the main segment
                if vplType == 1:
                    bits = line.split( ' ', 2 )
                    #vPrint( 'Quiet', debuggingThisModule, self.givenName, BBB, bits )
                    if len(bits) == 3 and ':' in bits[1]:
                        bookCodeText, CVString, vText = bits
                        chapterNumberString, verseNumberString = CVString.split( ':' )
                        #vPrint( 'Quiet', debuggingThisModule, "{} {} bc={!r} c={!r} v={!r} txt={!r}".format( self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString, vText ) )
                        if chapterNumberString == '': chapterNumberString = '1' # Handle a bug in some single chapter books in VPL
                    else: vPrint( 'Quiet', debuggingThisModule, "Unexpected number of bits", self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString, len(bits), bits )

                    if not bookCodeText and not chapterNumberString and not verseNumberString:
                        vPrint( 'Quiet', debuggingThisModule, "Skipping empty line in {} {} {} {}:{}".format( self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) )
                        continue
                    if BibleOrgSysGlobals.debugFlag: assert 2  <= len(bookCodeText) <= 4
                    if BibleOrgSysGlobals.debugFlag: assert chapterNumberString.isdigit()
                    if not verseNumberString.isdigit():
                        logging.error( "Invalid verse number field at {}/{} {}:{!r}".format( bookCodeText, BBB, chapterNumberString, verseNumberString ) )
                        if BibleOrgSysGlobals.debugFlag and debuggingThisModule: assert verseNumberString.isdigit()
                        continue
                    chapterNumber = int( chapterNumberString )
                    verseNumber = int( verseNumberString )

                    if bookCodeText != lastBookCodeText: # We've started a new book
                        lastBBB = BBB
                        #if bookCodeText in ('Ge',): BBB = 'GEN'
                        if bookCodeText == 'Le' and lastBBB == 'GEN': BBB = 'LEV'
                        elif bookCodeText in ('Jud',) and lastBBB == 'JOS': BBB = 'JDG'
                        #elif bookCodeText in ('Es',): BBB = 'EST'
                        #elif bookCodeText in ('Pr',): BBB = 'PRO'
                        #elif bookCodeText in ('So','SOL') and lastBBB == 'ECC': BBB = 'SNG'
                        #elif bookCodeText in ('La',) and lastBBB == 'JER': BBB = 'LAM'
                        #elif bookCodeText == 'PHI' and lastBBB == 'EPH': BBB = 'PHP'
                        #elif bookCodeText == 'PHI' and self.givenName == "bjp_vpl": BBB = 'PHP' # Hack for incomplete NT
                        #elif bookCodeText in ('Jude',): BBB = 'JDE'
                        #elif bookCodeText == 'PRA' and lastBBB == 'LJE': BBB = 'PAZ'
                        #elif bookCodeText == 'PRM' and lastBBB == 'GES': BBB = 'MAN'
                        else:
                            BBB = BOS66.getBBBFromText( bookCodeText )  # Try to guess
                            if not BBB: BBB = BOS81.getBBBFromText( bookCodeText )  # Try to guess
                            if not BBB: BBB = BOSx.getBBBFromText( bookCodeText )  # Try to guess
                            if not BBB: BBB = BibleOrgSysGlobals.loadedBibleBooksCodes.getBBBFromText( bookCodeText )  # Try to guess
                        if not BBB:
                            logging.critical( "VPL Bible: Unable to determine book code from text {!r} after {!r}={}".format( bookCodeText, lastBookCodeText, lastBBB ) )
                            halt

                    # Handle special formatting
                    #   [square-brackets] are for Italicized words
                    #   <angle-brackets> are for the Words of Christ in Red
                    #   «chevrons»  are for the Titles in the Book  of Psalms.
                    vText = vText.replace( '[', '\\add ' ).replace( ']', '\\add*' ) \
                        .replace( '<', '\\wj ' ).replace( '>', '\\wj*' )
                    if vText and vText[0]=='«':
                        #vPrint( 'Quiet', debuggingThisModule, "Oh!", BBB, chapterNumberString, verseNumberString, repr(vText) )
                        if BBB=='PSA' and verseNumberString=='1': # Psalm title
                            vBits = vText[1:].split( '»' )
                            #vPrint( 'Quiet', debuggingThisModule, "vBits", vBits )
                            thisBook.addLine( 'd', vBits[0] ) # Psalm title
                            vText = vBits[1].lstrip()

                    # Handle the verse info
                    #if verseNumber==lastVerseNumber and vText==lastVText:
                        #logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) )
                        #continue
                    if BBB=='PSA' and verseNumberString=='1' and vText.startswith('&lt;') and self.givenName=='basic_english':
                        # Move Psalm titles to verse zero
                        verseNumber = 0
                    #if verseNumber < lastVerseNumber:
                        #logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) )
                    #elif verseNumber == lastVerseNumber:
                        #if vText == lastVText:
                            #logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) )
                        #else:
                            #logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) )

                elif vplType in (2,3):
                    bits = line.split( '\t', 1 )
                    #vPrint( 'Quiet', debuggingThisModule, self.givenName, BBB, bits )
                    bookNumberString, chapterNumberString, verseNumberString = bits[0][:2], bits[0][2:5], bits[0][5:]
                    #vPrint( 'Quiet', debuggingThisModule, bookNumberString, chapterNumberString, verseNumberString )
                    chapterNumberString = chapterNumberString.lstrip( '0' ) # Remove leading zeroes
                    verseNumberString = verseNumberString.lstrip( '0' ) # Remove leading zeroes
                    bookCodeText, chapterNumber, verseNumber = int( bookNumberString), int(chapterNumberString), int(verseNumberString)
                    vText = bits[1].replace(' ,',',').replace(' .','.').replace(' ;',';').replace(' :',':') \
                                    .replace(' !','!').replace(' )',')').replace(' ]',']').replace(' ”','”') \
                                    .replace('“ ','“').replace('( ','(').replace('[ ','[') #.replace(' !','!')

                    if bookCodeText != lastBookCodeText: # We've started a new book
                        lastBBB = BBB
                        bnDict = { 67:'TOB', 68:'JDT', 69:'ESG', 70:'WIS', 71:'SIR', 72:'BAR', 73:'LJE', 74:'PAZ', 75:'SUS',
                                76:'BEL', 77:'MA1', 78:'MA2', 79:'MA3', 80:'MA4', 81:'ES1', 82:'ES2', 83:'MAN', 84:'PS2',
                                85:'PSS', 86:'ODE', }
                        if 1 <= bookCodeText <= 66: BBB = BibleOrgSysGlobals.loadedBibleBooksCodes.getBBBFromReferenceNumber( bookCodeText )
                        else: BBB = bnDict[bookCodeText]

                #elif vplType == 4:
                    #if line.startswith( '$$ ' ):
                        #if metadataName and metadataContents:
                            #settingsDict[metadataName] = metadataContents
                            #metadataName = None
                        #pointer = line[3:]
                        ##vPrint( 'Quiet', debuggingThisModule, "pointer", repr(pointer) )
                        #if pointer and pointer[0]=='{' and pointer[-1]=='}':
                            #metadataName = pointer[1:-1]
                            #if metadataName:
                                ##vPrint( 'Quiet', debuggingThisModule, "metadataName", repr(metadataName) )
                                #metadataContents = ''
                        #else: # let's assume it's a BCV reference
                            #pointer = pointer.replace( '1 K','1K' ).replace( '2 K','2K' ) \
                                            #.replace( '1 Chr','1Chr' ).replace( '2 Chr','2Chr' ) \
                                            #.replace( '1 Cor','1Cor' ).replace( '2 Cor','2Cor' ) \
                                            #.replace( '1 Thess','1Thess' ).replace( '2 Thess','2Thess' ) \
                                            #.replace( '1 Tim','1Tim' ).replace( '2 Tim','2Tim' ) \
                                            #.replace( '1 Pet','1Pet' ).replace( '2 Pet','2Pet' ) \
                                            #.replace( '1 J','1J' ).replace( '2 J','2J' ).replace( '3 J','3J' )
                            #B_CV_Bits = pointer.split( ' ', 1 )
                            #if len(B_CV_Bits) == 2 and ':' in B_CV_Bits[1]:
                                #bookCodeText, CVString = B_CV_Bits
                                #chapterNumberString, verseNumberString = CVString.split( ':' )
                                #chapterNumber = int( chapterNumberString )
                                #verseNumber = int( verseNumberString )
                                #if bookCodeText != lastBookCodeText: # We've started a new book
                                    #if bookCodeText in ('Ge',): BBB = 'GEN'
                                    #elif bookCodeText in ('Le',): BBB = 'LEV'
                                    #elif bookCodeText in ('La',): BBB = 'LAM'
                                    #else:
                                        ##vPrint( 'Quiet', debuggingThisModule, "4bookCodeText =", repr(bookCodeText) )
                                        ##BBB = BOS.getBBBFromText( bookCodeText )  # Try to guess
                                        #BBB = BOS66.getBBBFromText( bookCodeText )  # Try to guess
                                        #if not BBB: BBB = BOS81.getBBBFromText( bookCodeText )  # Try to guess
                                        #if not BBB: BBB = BOSx.getBBBFromText( bookCodeText )  # Try to guess
                                        ##vPrint( 'Quiet', debuggingThisModule, "4BBB =", repr(BBB) )
                            #else: vPrint( 'Quiet', debuggingThisModule, "Unexpected number of bits", self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString, len(bits), bits )
                        #continue # Just save the pointer information which refers to the text on the next line
                    #else: # it's not a $$ line
                        #text = line
                        ##vPrint( 'Quiet', debuggingThisModule, "text", repr(text) )
                        #if metadataName:
                            #metadataContents += ('\n' if metadataContents else '') + text
                            #continue
                        #else:
                            #vText = text
                            ## Handle bits like (<scripref>Pr 2:7</scripref>)
                            #vText = vText.replace( '(<scripref>', '\\x - \\xt ' ).replace( '</scripref>)', '\\x*' )
                            #vText = vText.replace( '<scripref>', '\\x - \\xt ' ).replace( '</scripref>', '\\x*' )
                            ##if '\\' in vText: vPrint( 'Quiet', debuggingThisModule, 'VPL vText', repr(vText) )
                            #if vplType == 4: # Forge for SwordSearcher
                                ##vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, verseNumber, repr(vText) )
                                ## Convert {stuff} to footnotes
                                #match = re.search( '\\{(.+?)\\}', vText )
                                #while match:
                                    #footnoteText = '\\f + \\fr {}:{} \\ft {}\\f*'.format( chapterNumber, verseNumber, match.group(1) )
                                    #vText = vText[:match.start()] + footnoteText + vText[match.end():] # Replace this footnote
                                    ##vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, verseNumber, repr(vText) )
                                    #match = re.search( '\\{(.+?)\\}', vText )
                                ## Convert [stuff] to added fields
                                #match = re.search( '\\[(.+?)\\]', vText )
                                #while match:
                                    #addText = '\\add {}\\add*'.format( match.group(1) )
                                    #vText = vText[:match.start()] + addText + vText[match.end():] # Replace this chunk
                                    ##vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, verseNumber, repr(vText) )
                                    #match = re.search( '\\[(.+?)\\]', vText )
                                #for badChar in '{}[]':
                                    #if badChar in vText:
                                        #logging.warning( "Found remaining braces or brackets in SwordSearcher Forge VPL {} {}:{} {!r}".format( BBB, chapterNumberString, verseNumberString, vText ) )
                                        #break

                else:
                    logging.critical( 'Unknown VPL type {}'.format( vplType ) )
                    if BibleOrgSysGlobals.debugFlag and debuggingThisModule: halt

                if bookCodeText:
                    if bookCodeText != lastBookCodeText: # We've started a new book
                        if lastBookCodeText is not None: # Better save the last book
                            self.stashBook( thisBook )
                        if BBB:
                            if BBB in self:
                                logging.critical( "Have duplicated {} book in {}".format( self.givenName, BBB ) )
                            if BibleOrgSysGlobals.debugFlag: assert BBB not in self
                            thisBook = BibleBook( self, BBB )
                            thisBook.objectNameString = 'VPL Bible Book object'
                            thisBook.objectTypeString = 'VPL'
                            verseList = BOSx.getNumVersesList( BBB )
                            numChapters, numVerses = len(verseList), verseList[0]
                            lastBookCodeText = bookCodeText
                            lastChapterNumber = lastVerseNumber = -1
                        else:
                            logging.critical( "VPLBible{} could not figure out {!r} book code".format( vplType, bookCodeText ) )
                            if BibleOrgSysGlobals.debugFlag: halt

                    if BBB:
                        if chapterNumber != lastChapterNumber: # We've started a new chapter
                            if BibleOrgSysGlobals.debugFlag: assert chapterNumber > lastChapterNumber or BBB=='ESG' # Esther Greek might be an exception
                            if chapterNumber == 0:
                                logging.info( "Have chapter zero in {} {} {} {}:{}".format( self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) )
                            elif chapterNumber > numChapters:
                                logging.error( "Have high chapter number in {} {} {} {}:{} (expected max of {})".format( self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString, numChapters ) )
                            thisBook.addLine( 'c', chapterNumberString )
                            lastChapterNumber = chapterNumber
                            lastVerseNumber = -1

                        # Handle the verse info
                        if verseNumber==lastVerseNumber and vText==lastVText:
                            logging.warning( _("Ignored duplicate verse line in {} {} {} {}:{}").format( self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) )
                            continue
                        if verseNumber < lastVerseNumber:
                            logging.warning( _("Ignored receding verse number (from {} to {}) in {} {} {} {}:{}").format( lastVerseNumber, verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) )
                        elif verseNumber == lastVerseNumber:
                            if vText == lastVText:
                                logging.warning( _("Ignored duplicated {} verse in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) )
                            else:
                                logging.warning( _("Ignored duplicated {} verse number in {} {} {} {}:{}").format( verseNumber, self.givenName, BBB, bookCodeText, chapterNumberString, verseNumberString ) )

                        # Check for paragraph markers
                        if vText and vText[0]=='¶':
                            thisBook.addLine( 'p', '' )
                            vText = vText[1:].lstrip()

                        #vPrint( 'Quiet', debuggingThisModule, '{} {}:{} = {!r}'.format( BBB, chapterNumberString, verseNumberString, vText ) )
                        thisBook.addLine( 'v', verseNumberString + ' ' + vText )
                        lastVText = vText
                        lastVerseNumber = verseNumber

                else: # No bookCodeText yet
                    logging.warning( "VPLBible.load{} is skipping unknown pre-book line: {}".format( vplType, line ) )

        # Save the final book
        if thisBook is not None: self.stashBook( thisBook )

        # Clean up
        if settingsDict:
            #vPrint( 'Quiet', debuggingThisModule, "VPL settingsDict", settingsDict )
            if self.suppliedMetadata is None: self.suppliedMetadata = {}
            self.suppliedMetadata['VPL'] = settingsDict
            self.applySuppliedMetadata( 'VPL' ) # Copy some to self.settingsDict

        self.doPostLoadProcessing()
Exemplo n.º 11
0
class OpenSongXMLBible(Bible):
    """
    Class for reading, validating, and converting OpenSong Bible XML.
    """
    treeTag = 'bible'
    bookTag = 'b'
    chapterTag = 'c'
    verseTag = 'v'

    def __init__(self, sourceFolder, givenName, encoding='utf-8') -> None:
        """
        Constructor: just sets up the XML Bible file converter object.
        """
        # Setup and initialise the base class first
        dPrint(
            'Quiet', debuggingThisModule,
            "OpenSongXMLBible( {}, {}, {} )".format(sourceFolder, givenName,
                                                    encoding))
        Bible.__init__(self)
        self.objectNameString = 'OpenSong XML Bible object'
        self.objectTypeString = 'OpenSong'

        # Now we can set our object variables
        self.sourceFolder, self.givenName, self.encoding = sourceFolder, givenName, encoding
        self.sourceFilepath = os.path.join(self.sourceFolder, self.givenName)

        self.XMLTree = None  # Will hold the XML data

        # Get the data tables that we need for proper checking
        #self.ISOLanguages = ISO_639_3_Languages().loadData()
        self.genericBOS = BibleOrganisationalSystem('GENERIC-KJV-66-ENG')

        # Do a preliminary check on the readability of our file
        if not os.access(self.sourceFilepath, os.R_OK):
            vPrint(
                'Quiet', debuggingThisModule,
                "OpenSongXMLBible: File {!r} is unreadable".format(
                    self.sourceFilepath))

        self.name = self.givenName
        #if self.name is None:
        #pass

    # end of OpenSongXMLBible.__init__

    def load(self):
        """
        Load a single source XML file and load book elements.
        """
        vPrint('Info', debuggingThisModule,
               _("Loading {}…").format(self.sourceFilepath))
        self.XMLTree = ElementTree().parse(self.sourceFilepath)
        if BibleOrgSysGlobals.debugFlag:
            assert self.XMLTree  # Fail here if we didn't load anything at all

        # Find the main (bible) container
        if self.XMLTree.tag == OpenSongXMLBible.treeTag:
            location = "XML file"
            BibleOrgSysGlobals.checkXMLNoText(self.XMLTree, location, '4f6h')
            BibleOrgSysGlobals.checkXMLNoTail(self.XMLTree, location, '1wk8')

            name = shortName = None
            for attrib, value in self.XMLTree.items():
                if attrib == "n":
                    name = value
                elif attrib == "sn":
                    shortName = value
                else:
                    logging.warning(
                        "Unprocessed {!r} attribute ({}) in main element".
                        format(attrib, value))

            # Find the submain (book) containers
            for element in self.XMLTree:
                if element.tag == OpenSongXMLBible.bookTag:
                    sublocation = "book in " + location
                    BibleOrgSysGlobals.checkXMLNoText(element, sublocation,
                                                      'g3g5')
                    BibleOrgSysGlobals.checkXMLNoTail(element, sublocation,
                                                      'd3f6')
                    self.__validateAndExtractBook(element)
                elif element.tag == 'OT':
                    pass
                elif element.tag == 'NT':
                    pass
                else:
                    logging.error("Expected to find {!r} but got {!r}".format(
                        OpenSongXMLBible.bookTag, element.tag))
        else:
            logging.error("Expected to load {!r} but got {!r}".format(
                OpenSongXMLBible.treeTag, self.XMLTree.tag))
        self.doPostLoadProcessing()

    # end of OpenSongXMLBible.load

    def __validateAndExtractBook(self, book):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """
        global BibleBooksNames

        vPrint('Verbose', debuggingThisModule,
               _("Validating OpenSong XML book…"))

        # Process the div attributes first
        BBB = bookName = None
        for attrib, value in book.items():
            if attrib == "n":
                bookName = value
            else:
                logging.warning(
                    "Unprocessed {!r} attribute ({}) in book element".format(
                        attrib, value))
        if bookName:
            BBB = self.genericBOS.getBBBFromText(
                bookName)  # Booknames are usually in English
            if not BBB:  # wasn't English
                if BibleBooksNames is None:
                    BibleBooksNames = BibleBooksNamesSystems().loadData()
                BBB = BibleBooksNames.getBBBFromText(
                    bookName)  # Try non-English booknames
                #dPrint( 'Quiet', debuggingThisModule, "bookName", bookName, BBB )
            if BBB:
                vPrint('Info', debuggingThisModule,
                       _("Validating {} {}…").format(BBB, bookName))
                thisBook = BibleBook(self, BBB)
                thisBook.objectNameString = 'OpenSong XML Bible Book object'
                thisBook.objectTypeString = 'OpenSong'
                #thisBook.sourceFilepath = self.sourceFilepath
                USFMAbbreviation = BibleOrgSysGlobals.loadedBibleBooksCodes.getUSFMAbbreviation(
                    BBB)
                if not USFMAbbreviation:
                    logging.critical(
                        f"Unable to find USFM abbreviation for '{BBB}'")
                    if BibleOrgSysGlobals.strictCheckingFlag: halt
                    USFMAbbreviation = 'XXA'
                thisBook.addLine(
                    'id', '{} imported by {}'.format(USFMAbbreviation.upper(),
                                                     programNameVersion))
                thisBook.addLine('h', bookName)
                thisBook.addLine('mt1', bookName)
                for element in book:
                    if element.tag == OpenSongXMLBible.chapterTag:
                        sublocation = "chapter in {}".format(BBB)
                        BibleOrgSysGlobals.checkXMLNoText(
                            element, sublocation, 'j3jd')
                        BibleOrgSysGlobals.checkXMLNoTail(
                            element, sublocation, 'al1d')
                        self.__validateAndExtractChapter(
                            BBB, thisBook, element)
                    else:
                        logging.error(
                            "Expected to find {!r} but got {!r}".format(
                                OpenSongXMLBible.chapterTag, element.tag))
                vPrint('Info', debuggingThisModule,
                       "  Saving {} into results…".format(BBB))
                self.stashBook(thisBook)
            else:
                logging.error(
                    _("OpenSong load doesn't recognize book name: {!r}").
                    format(bookName))  # no BBB
        else:
            logging.error(
                _("OpenSong load can't find a book name"))  # no bookName

    # end of OpenSongXMLBible.__validateAndExtractBook

    def __validateAndExtractChapter(self, BBB: str, thisBook, chapter):
        """
        Check/validate and extract chapter data from the given XML book record
            finding and saving chapter numbers and
            finding and saving verse elements.
        """

        vPrint('Verbose', debuggingThisModule, _("Validating XML chapter…"))

        # Process the div attributes first
        chapterNumber = numVerses = None
        for attrib, value in chapter.items():
            if attrib == "n":
                chapterNumber = value
            elif attrib == "VERSES":
                numVerses = value
            else:
                logging.warning(
                    "Unprocessed {!r} attribute ({}) in chapter element".
                    format(attrib, value))
        if chapterNumber:
            #dPrint( 'Quiet', debuggingThisModule, BBB, 'c', chapterNumber )
            chapterNumber = chapterNumber.replace(
                'of Solomon ', '')  # Fix a mistake in the Chinese_SU module
            thisBook.addLine('c', chapterNumber)
        else:
            logging.error(
                "Missing 'n' attribute in chapter element for {}".format(BBB))

        for element in chapter:
            if element.tag == OpenSongXMLBible.verseTag:
                sublocation = "verse in {} {}".format(BBB, chapterNumber)
                BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'l5ks')
                verseNumber = toVerseNumber = None
                for attrib, value in element.items():
                    if attrib == "n":
                        verseNumber = value
                    elif attrib == "t":
                        toVerseNumber = value
                    else:
                        logging.warning(
                            "Unprocessed {!r} attribute ({}) in verse element".
                            format(attrib, value))
                if BibleOrgSysGlobals.debugFlag: assert verseNumber
                #thisBook.addLine( 'v', verseNumber )
                vText = element.text if element.text else ''
                for subelement in element:
                    sub2location = "{} in {}".format(subelement.tag,
                                                     sublocation)
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        subelement, sub2location, 'ks03')
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        subelement, sub2location, 'ks05')
                    if subelement.tag == 'i':
                        vText += '\\it {}\\it*{}'.format(
                            subelement.text, subelement.tail)
                    else:
                        logging.error(
                            "Expected to find 'i' but got {!r}".format(
                                subelement.tag))
                vText += element.tail if element.tail else ''
                if not vText:
                    logging.warning("{} {}:{} has no text".format(
                        BBB, chapterNumber, verseNumber))
                #dPrint( 'Quiet', debuggingThisModule, 'vText1', vText )
                if vText:  # This is the main text of the verse (follows the verse milestone)
                    #dPrint( 'Quiet', debuggingThisModule, "{} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, vText ) )
                    if '\n' in vText:  # This is how they represent poety
                        #dPrint( 'Quiet', debuggingThisModule, "vText", repr(vText), repr(element.text) )
                        for j, textBit in enumerate(vText.split('\n')):
                            if j == 0:
                                thisBook.addLine('q1', '')
                                thisBook.addLine('v',
                                                 verseNumber + ' ' + textBit)
                            else:
                                thisBook.addLine('q1', textBit)
                    else:  # Just one verse line
                        thisBook.addLine('v', verseNumber + ' ' + vText)
                #dPrint( 'Quiet', debuggingThisModule, 'vText2', vText )
            else:
                logging.error("Expected to find {!r} but got {!r}".format(
                    OpenSongXMLBible.verseTag, element.tag))
Exemplo n.º 12
0
def createOpenSongXML(BibleObject,
                      outputFolder=None,
                      controlDict=None,
                      validationSchema=None):
    """
    Using settings from the given control file,
        converts the USFM information to a UTF-8 OpenSong XML file.

    This format is roughly documented at http://de.wikipedia.org/wiki/OpenSong_XML
        but more fields can be discovered by looking at downloaded files.
    """
    vPrint('Normal', debuggingThisModule, "Running createOpenSongXML…")
    if BibleOrgSysGlobals.debugFlag: assert BibleObject.books

    ignoredMarkers, unhandledMarkers, unhandledBooks = set(), set(), []

    def writeOpenSongBook(writerObject, BBB: str, bkData):
        """Writes a book to the OpenSong XML writerObject."""
        #dPrint( 'Quiet', debuggingThisModule, 'BIBLEBOOK', [('bnumber',BibleOrgSysGlobals.loadedBibleBooksCodes.getReferenceNumber(BBB)), ('bname',BibleOrgSysGlobals.loadedBibleBooksCodes.getEnglishName_NR(BBB)), ('bsname',BibleOrgSysGlobals.loadedBibleBooksCodes.getOSISAbbreviation(BBB))] )
        OSISAbbrev = BibleOrgSysGlobals.loadedBibleBooksCodes.getOSISAbbreviation(
            BBB)
        if not OSISAbbrev:
            logging.warning(
                "toOpenSong: Can't write {} OpenSong book because no OSIS code available"
                .format(BBB))
            unhandledBooks.append(BBB)
            return
        writerObject.writeLineOpen('b', ('n', bkData.getAssumedBookNames()[0]))
        haveOpenChapter, startedFlag, gotVP, accumulator = False, False, None, ""
        C, V = '-1', '-1'  # So first/id line starts at -1:0
        for processedBibleEntry in bkData._processedLines:  # Process internal Bible data lines
            marker, text, extras = processedBibleEntry.getMarker(
            ), processedBibleEntry.getCleanText(
            ), processedBibleEntry.getExtras()
            #dPrint( 'Quiet', debuggingThisModule, marker, repr(text) )
            #if text: assert text[0] != ' '
            if '¬' in marker or marker in BOS_ADDED_NESTING_MARKERS:
                continue  # Just ignore added markers -- not needed here
            if marker in USFM_PRECHAPTER_MARKERS:
                if debuggingThisModule or BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.strictCheckingFlag:
                    assert C == '-1' or marker == 'rem' or marker.startswith(
                        'mte')
                V = str(int(V) + 1)

            if marker in OFTEN_IGNORED_USFM_HEADER_MARKERS or marker in (
                    'ie', ):  # Just ignore these lines
                ignoredMarkers.add(marker)
            elif marker == 'c':
                if accumulator:
                    writerObject.writeLineOpenClose('v', accumulator,
                                                    ('n', verseNumberString))
                    accumulator = ''
                if haveOpenChapter:
                    writerObject.writeLineClose('c')
                C, V = text, '0'
                writerObject.writeLineOpen('c', ('n', text))
                haveOpenChapter = True
            elif marker in (
                    'c#',
            ):  # These are the markers that we can safely ignore for this export
                ignoredMarkers.add(marker)
            elif marker == 'vp#':  # This precedes a v field and has the verse number to be printed
                gotVP = text  # Just remember it for now
            elif marker == 'v':
                V = text
                if gotVP:  # this is the verse number to be published
                    text = gotVP
                    gotVP = None
                startedFlag = True
                if accumulator:
                    writerObject.writeLineOpenClose('v', accumulator,
                                                    ('n', verseNumberString))
                    accumulator = ''
                #dPrint( 'Quiet', debuggingThisModule, "Text {!r}".format( text ) )
                if not text:
                    logging.warning("createOpenSongXML: Missing text for v")
                    continue
                verseNumberString = text.replace('<', '').replace(
                    '>', ''
                ).replace(
                    '"', ''
                )  # Used below but remove anything that'll cause a big XML problem later

            elif marker in ('mt1','mt2','mt3','mt4', 'mte1','mte2','mte3','mte4', 'ms1','ms2','ms3','ms4', ) \
            or marker in USFM_ALL_INTRODUCTION_MARKERS \
            or marker in ('s1','s2','s3','s4', 'r','sr','mr', 'd','sp','cd', 'cl','lit', ):
                ignoredMarkers.add(marker)
            elif marker in USFM_BIBLE_PARAGRAPH_MARKERS:
                if BibleOrgSysGlobals.debugFlag: assert not text and not extras
                ignoredMarkers.add(marker)
            elif marker in (
                    'b',
                    'nb',
                    'ib',
            ):
                if BibleOrgSysGlobals.debugFlag: assert not text and not extras
                ignoredMarkers.add(marker)
            elif marker in (
                    'v~',
                    'p~',
            ):
                if BibleOrgSysGlobals.debugFlag: assert text or extras
                if not text:  # this is an empty (untranslated) verse
                    text = '- - -'  # but we'll put in a filler
                if startedFlag:
                    accumulator += (' ' if accumulator else
                                    '') + BibleOrgSysGlobals.makeSafeXML(text)
            else:
                if text:
                    logging.warning(
                        "toOpenSong: lost text in {} field in {} {}:{} {!r}".
                        format(marker, BBB, C, V, text))
                    #if BibleOrgSysGlobals.debugFlag: halt
                if extras:
                    logging.warning(
                        "toOpenSong: lost extras in {} field in {} {}:{}".
                        format(marker, BBB, C, V))
                    #if BibleOrgSysGlobals.debugFlag: halt
                unhandledMarkers.add(marker)
            if extras and marker not in (
                    'v~',
                    'p~',
            ) and marker not in ignoredMarkers:
                logging.critical(
                    "toOpenSong: extras not handled for {} at {} {}:{}".format(
                        marker, BBB, C, V))
        if accumulator:
            writerObject.writeLineOpenClose('v', accumulator,
                                            ('n', verseNumberString))
        if haveOpenChapter:
            writerObject.writeLineClose('c')
        writerObject.writeLineClose('b')

    # end of createOpenSongXML.writeOpenSongBook

    # Set-up our Bible reference system
    if 'PublicationCode' not in controlDict or controlDict[
            'PublicationCode'] == 'GENERIC':
        BOS = BibleObject.genericBOS
        BRL = BibleObject.genericBRL
    else:
        BOS = BibleOrganisationalSystem(controlDict['PublicationCode'])
        BRL = BibleReferenceList(BOS, BibleObject=None)

    vPrint('Info', debuggingThisModule, _("  Exporting to OpenSong format…"))
    try:
        osOFn = controlDict['OpenSongOutputFilename']
    except KeyError:
        osOFn = 'Bible.osong'
    filename = BibleOrgSysGlobals.makeSafeFilename(osOFn)
    xw = MLWriter(filename, outputFolder)
    xw.setHumanReadable()
    xw.start()
    xw.writeLineOpen('Bible')
    for BBB, bookData in BibleObject.books.items():
        writeOpenSongBook(xw, BBB, bookData)
    xw.writeLineClose('Bible')
    xw.close()

    if ignoredMarkers:
        logging.info("createOpenSongXML: Ignored markers were {}".format(
            ignoredMarkers))
    vPrint(
        'Info', debuggingThisModule,
        "  " + _("WARNING: Ignored createOpenSongXML markers were {}").format(
            ignoredMarkers))
    if unhandledMarkers:
        logging.warning("createOpenSongXML: Unhandled markers were {}".format(
            unhandledMarkers))
        vPrint(
            'Normal', debuggingThisModule,
            "  " + _("WARNING: Unhandled toOpenSong markers were {}").format(
                unhandledMarkers))
    if unhandledBooks:
        logging.warning("createOpenSongXML: Unhandled books were {}".format(
            unhandledBooks))
        vPrint(
            'Normal', debuggingThisModule, "  " +
            _("WARNING: Unhandled createOpenSongXML books were {}").format(
                unhandledBooks))

    # Now create a zipped version
    filepath = os.path.join(outputFolder, filename)
    vPrint('Info', debuggingThisModule,
           "  Zipping {} OpenSong file…".format(filename))
    zf = zipfile.ZipFile(filepath + '.zip',
                         'w',
                         compression=zipfile.ZIP_DEFLATED)
    zf.write(filepath, filename)
    zf.close()

    if validationSchema: return xw.validate(validationSchema)
    if BibleOrgSysGlobals.verbosityLevel > 0 and BibleOrgSysGlobals.maxProcesses > 1:
        vPrint('Quiet', debuggingThisModule,
               "  createOpenSongXML finished successfully.")
    return True