示例#1
0
    def _validate( self ):
        """
        Check/validate the loaded data.
        """
        assert self._XMLTree

        uniqueDict = {}
        #for elementName in self._uniqueElements: uniqueDict["Element_"+elementName] = []
        for attributeName in self._uniqueAttributes: uniqueDict["Attribute_"+attributeName] = []

        for j,element in enumerate(self._XMLTree):
            if element.tag == self._mainElementTag:
                BibleOrgSysGlobals.checkXMLNoText( element, element.tag )
                BibleOrgSysGlobals.checkXMLNoTail( element, element.tag )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, element.tag )

                # Check compulsory attributes on this main element
                for attributeName in self._compulsoryAttributes:
                    attributeValue = element.get( attributeName )
                    if attributeValue is None:
                        logging.error( "Compulsory {!r} attribute is missing from {} element in record {}".format( attributeName, element.tag, j ) )
                    if not attributeValue and attributeName!="type":
                        logging.warning( "Compulsory {!r} attribute is blank on {} element in record {}".format( attributeName, element.tag, j ) )

                # Check optional attributes on this main element
                for attributeName in self._optionalAttributes:
                    attributeValue = element.get( attributeName )
                    if attributeValue is not None:
                        if not attributeValue:
                            logging.warning( "Optional {!r} attribute is blank on {} element in record {}".format( attributeName, element.tag, j ) )

                # Check for unexpected additional attributes on this main element
                for attributeName in element.keys():
                    attributeValue = element.get( attributeName )
                    if attributeName not in self._compulsoryAttributes and attributeName not in self._optionalAttributes:
                        logging.warning( "Additional {!r} attribute ({!r}) found on {} element in record {}".format( attributeName, attributeValue, element.tag, j ) )

                # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes)
                for attributeName in self._uniqueAttributes:
                    attributeValue = element.get( attributeName )
                    if attributeValue is not None and attributeName!="reference_name":
                        if attributeValue in uniqueDict["Attribute_"+attributeName]:
                            logging.error( "Found {!r} data repeated in {!r} field on {} element in record {}".format( attributeValue, attributeName, element.tag, j ) )
                        uniqueDict["Attribute_"+attributeName].append( attributeValue )
            else:
                logging.warning( "Unexpected element: {} in record {}".format( element.tag, j ) )
示例#2
0
    def __validateAndExtractVerse(self, BBB: str, chapterNumber, thisBook,
                                  verse):
        """
        Check/validate and extract verse data from the given XML book record
            finding and saving verse elements.
        """

        if BibleOrgSysGlobals.debugFlag and debuggingThisModule and BibleOrgSysGlobals.verbosityLevel > 3:
            vPrint('Quiet', debuggingThisModule, _("Validating XML verse…"))

        location = "verse in {} {}".format(BBB, chapterNumber)
        BibleOrgSysGlobals.checkXMLNoSubelements(verse, location, 'sg20')
        BibleOrgSysGlobals.checkXMLNoTail(verse, location, 'l5ks')

        # Handle verse attributes
        verseNumber = toVerseNumber = None
        for attrib, value in verse.items():
            if attrib == "n":
                verseNumber = value
            else:
                logging.warning(
                    "Unprocessed {!r} attribute ({}) in verse element".format(
                        attrib, value))
        if BibleOrgSysGlobals.debugFlag: assert verseNumber
        location = "{}:{}".format(
            location, verseNumber)  # Get a better location description
        #thisBook.addLine( 'v', verseNumber )
        vText = '' if verse.text is None else verse.text
        if vText: vText = vText.strip()
        #if not vText: # This happens if a verse starts immediately with a style or note
        #logging.warning( "{} {}:{} has no text".format( BBB, chapterNumber, verseNumber ) )

        ## Handle verse subelements (notes and styled portions)
        #for subelement in verse:
        #if subelement.tag == VerseViewXMLBible.noteTag:
        #sublocation = "note in " + location
        #noteType = None
        #for attrib,value in subelement.items():
        #if attrib=="type": noteType = value
        #else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) )
        #if noteType and noteType not in ('variant',):
        #logging.warning( "Unexpected {} note type in {}".format( noteType, BBB ) )
        #nText, nTail = subelement.text, subelement.tail
        ##vPrint( 'Quiet', debuggingThisModule, "note", BBB, chapterNumber, verseNumber, noteType, repr(nText), repr(nTail) )
        #vText += "\\f + \\fk {} \\ft {}\\f*".format( noteType, nText ) if noteType else "\\f + \\ft {}\\f*".format( nText )
        #if nTail:
        #if '\n' in nTail:
        #vPrint( 'Quiet', debuggingThisModule, "VerseViewXMLBible.__validateAndExtractVerse: nTail {} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, nTail ) )
        #nTail = nTail.replace( '\n', ' ' )
        #vText += nTail
        #for sub2element in subelement:
        #if sub2element.tag == VerseViewXMLBible.styleTag:
        #sub2location = "style in " + sublocation
        #BibleOrgSysGlobals.checkXMLNoSubelements( sub2element, sub2location, 'fyt4' )
        #fs = css = idStyle = None
        #for attrib,value in sub2element.items():
        #if attrib=='fs': fs = value
        ##elif attrib=="css": css = value
        ##elif attrib=="id": idStyle = value
        #else: logging.warning( "Unprocessed {!r} attribute ({}) in style sub2element".format( attrib, value ) )
        #if BibleOrgSysGlobals.debugFlag: assert fs or css or idStyle
        #SFM = None
        #if fs == 'italic': SFM = '\\it'
        #elif fs == 'super': SFM = '\\bdit'
        #elif fs == 'emphasis': SFM = '\\em'
        #else: vPrint( 'Quiet', debuggingThisModule, "fs is", fs, "css is", css, "idStyle is", idStyle ); halt
        ##if css == "font-style:italic": SFM = '\\it'
        ##elif css == "font-style:italic;font-weight:bold": SFM = '\\bdit'
        ##elif css == "color:#FF0000": SFM = '\\em'
        ##elif css == "font-size: x-small; color:#8B8378": SFM = '\\add'
        ##elif css is None and idStyle=='cl:divineName': SFM = '\\nd'
        ##else: vPrint( 'Quiet', debuggingThisModule, "css is", css, "idStyle is", idStyle ); halt
        #sText, sTail = sub2element.text.strip(), sub2element.tail
        #if BibleOrgSysGlobals.debugFlag: assert sText
        #if SFM: vText += SFM+' ' + sText + SFM+'*'
        #else: vText += '\\sc ' + '['+css+']' + sText + '\\sc* ' # Use sc for unknown styles
        #if sTail: vText += sTail.strip()
        #else: logging.error( "df20 Expected to find {} but got {!r} in {}".format( VerseViewXMLBible.styleTag, sub2element.tag, sublocation ) )

        #elif subelement.tag == VerseViewXMLBible.styleTag:
        #sublocation = "style in " + location
        #BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 'f5gh' )
        #fs = css = idStyle = None
        #for attrib,value in subelement.items():
        #if attrib=="fs": fs = value
        ##elif attrib=="css": css = value
        ##elif attrib=="id": idStyle = value
        #else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) )
        #if BibleOrgSysGlobals.debugFlag: assert fs
        #SFM = None
        #if fs == 'super': SFM = '\\bdit'
        #elif fs == 'emphasis': SFM = '\\em'
        #else: vPrint( 'Quiet', debuggingThisModule, "fs is", fs, "css is", css, "idStyle is", idStyle ); halt
        ##if css == "font-style:italic": SFM = '\\it'
        ##elif css == "font-style:italic;font-weight:bold": SFM = '\\bdit'
        ##elif css == "color:#FF0000": SFM = '\\em'
        ##elif css == "font-size: x-small; color:#8B8378": SFM = '\\add'
        ##elif css is None and idStyle=='cl:divineName': SFM = '\\nd'
        ##else: vPrint( 'Quiet', debuggingThisModule, "css is", css, "idStyle is", idStyle ); halt
        #sText, sTail = subelement.text.strip(), subelement.tail
        #if BibleOrgSysGlobals.debugFlag: assert sText
        ##vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, sublocation )
        #if SFM: vText += SFM+' ' + sText + SFM+'*'
        #else: vText += '\\sc ' + '['+css+']' + sText + '\\sc* ' # Use sc for unknown styles
        #if sTail: vText += sTail.strip()

        #elif subelement.tag == VerseViewXMLBible.breakTag:
        #sublocation = "line break in " + location
        #BibleOrgSysGlobals.checkXMLNoText( subelement, sublocation, 'c1d4' )
        #BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 'g4g8' )
        #art = None
        #for attrib,value in subelement.items():
        #if attrib=="art":
        #art = value
        #else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) )
        #if BibleOrgSysGlobals.debugFlag: assert art == 'x-nl'
        ##vPrint( 'Quiet', debuggingThisModule, BBB, chapterNumber, verseNumber )
        ##assert vText
        #if vText:
        #thisBook.addLine( 'v', verseNumber + ' ' + vText ); verseNumber = None
        #vText = ''
        #thisBook.addLine( 'm', subelement.tail.strip() if subelement.tail else '' )
        ##bTail = subelement.tail
        ##if bTail: vText = bTail.strip()
        #else: logging.error( "bd47 Expected to find NOTE or STYLE but got {!r} in {}".format( subelement.tag, location ) )

        if vText:  # This is the main text of the verse (follows the verse milestone)
            if '\n' in vText:
                vPrint(
                    'Quiet', debuggingThisModule,
                    "VerseViewXMLBible.__validateAndExtractVerse: vText {} {}:{} {!r}"
                    .format(BBB, chapterNumber, verseNumber, vText))
                vText = vText.replace('\n', ' ')
            thisBook.addLine('v', verseNumber + ' ' + vText)
            verseNumber = None
示例#3
0
    def load(self):
        """
        Load a single source XML file and load book elements.
        """
        vPrint('Info', debuggingThisModule,
               _("Loading {}…").format(self.sourceFilepath))
        self.XMLTree = ElementTree().parse(self.sourceFilepath)
        if BibleOrgSysGlobals.debugFlag:
            assert self.XMLTree  # Fail here if we didn't load anything at all

        if self.suppliedMetadata is None: self.suppliedMetadata = {}
        self.suppliedMetadata['VerseView'] = {}

        # Find the main (bible) container
        if self.XMLTree.tag == VerseViewXMLBible.treeTag:
            location = "VerseView XML file"
            BibleOrgSysGlobals.checkXMLNoText(self.XMLTree, location, '4f6h')
            BibleOrgSysGlobals.checkXMLNoAttributes(self.XMLTree, location,
                                                    'js24')
            BibleOrgSysGlobals.checkXMLNoTail(self.XMLTree, location, '1wk8')

            # Find the submain (various info and then book) containers
            bookNumber = 0
            for element in self.XMLTree:
                if element.tag == VerseViewXMLBible.filenameTag:
                    sublocation = "filename in " + location
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        element, sublocation, 'jk86')
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        element, sublocation, 'hjk7')
                    BibleOrgSysGlobals.checkXMLNoTail(element, sublocation,
                                                      'bh09')
                    #self.filename = element.text
                elif element.tag == VerseViewXMLBible.revisionTag:
                    sublocation = "revision in " + location
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        element, sublocation, 'jk86')
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        element, sublocation, 'hjk7')
                    BibleOrgSysGlobals.checkXMLNoTail(element, sublocation,
                                                      'bh09')
                    self.suppliedMetadata['VerseView'][
                        'Revision'] = element.text
                elif element.tag == VerseViewXMLBible.titleTag:
                    sublocation = "title in " + location
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        element, sublocation, 'jk86')
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        element, sublocation, 'hjk7')
                    BibleOrgSysGlobals.checkXMLNoTail(element, sublocation,
                                                      'bh09')
                    self.suppliedMetadata['VerseView']['Title'] = element.text
                elif element.tag == VerseViewXMLBible.fontTag:
                    sublocation = "font in " + location
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        element, sublocation, 'jk86')
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        element, sublocation, 'hjk7')
                    BibleOrgSysGlobals.checkXMLNoTail(element, sublocation,
                                                      'bh09')
                    self.suppliedMetadata['VerseView']['Font'] = element.text
                elif element.tag == VerseViewXMLBible.copyrightTag:
                    sublocation = "copyright in " + location
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        element, sublocation, 'jk86')
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        element, sublocation, 'hjk7')
                    BibleOrgSysGlobals.checkXMLNoTail(element, sublocation,
                                                      'bh09')
                    self.suppliedMetadata['VerseView'][
                        'Copyright'] = element.text
                elif element.tag == VerseViewXMLBible.sizefactorTag:
                    sublocation = "sizefactor in " + location
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        element, sublocation, 'jk86')
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        element, sublocation, 'hjk7')
                    BibleOrgSysGlobals.checkXMLNoTail(element, sublocation,
                                                      'bh09')
                    if BibleOrgSysGlobals.debugFlag: assert element.text == '1'
                elif element.tag == VerseViewXMLBible.bookTag:
                    sublocation = "book in " + location
                    BibleOrgSysGlobals.checkXMLNoText(element, sublocation,
                                                      'g3g5')
                    BibleOrgSysGlobals.checkXMLNoTail(element, sublocation,
                                                      'd3f6')
                    bookNumber += 1
                    self.__validateAndExtractBook(element, bookNumber)
                else:
                    logging.error(
                        "xk15 Expected to find {!r} but got {!r}".format(
                            VerseViewXMLBible.bookTag, element.tag))
        else:
            logging.error("Expected to load {!r} but got {!r}".format(
                VerseViewXMLBible.treeTag, self.XMLTree.tag))

        if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.verbosityLevel > 2:
            # These are all compulsory so they should all exist
            #vPrint( 'Quiet', debuggingThisModule, "Filename is {!r}".format( self.filename ) )
            vPrint(
                'Quiet', debuggingThisModule, "Revision is {!r}".format(
                    self.suppliedMetadata['VerseView']['Revision']))
            vPrint(
                'Quiet', debuggingThisModule, "Title is {!r}".format(
                    self.suppliedMetadata['VerseView']['Title']))
            vPrint(
                'Quiet', debuggingThisModule, "Font is {!r}".format(
                    self.suppliedMetadata['VerseView']['Font']))
            vPrint(
                'Quiet', debuggingThisModule, "Copyright is {!r}".format(
                    self.suppliedMetadata['VerseView']['Copyright']))
            #vPrint( 'Quiet', debuggingThisModule, "SizeFactor is {!r}".format( self.sizeFactor ) )

        self.applySuppliedMetadata(
            'VerseView')  # Copy some to self.settingsDict
        self.doPostLoadProcessing()
    def importDataToPython(self):
        """
        Loads (and pivots) the data (not including the header) into suitable Python containers to use in a Python program.
        (Of course, you can just use the elementTree in self._XMLTree if you prefer.)
        """
        def makeList(parameter1, parameter2):
            """
            Returns a list containing all parameters. Parameter1 may already be a list.
            """
            if isinstance(parameter1, list):
                #assert parameter2 not in parameter1
                parameter1.append(parameter2)
                return parameter1
            else:
                return [parameter1, parameter2]

        # end of makeList

        assert self._XMLTree
        if self.__DataList:  # We've already done an import/restructuring -- no need to repeat it
            return self.__DataList, self.__DataDict

        # We'll create a number of dictionaries with different elements as the key
        rawRefLinkList = []
        actualLinkCount = 0
        for element in self._XMLTree:
            #vPrint( 'Quiet', debuggingThisModule, BibleOrgSysGlobals.elementStr( element ) )

            # Get these first for helpful error messages
            sourceReference = element.find('sourceReference').text
            sourceComponent = element.find('sourceComponent').text
            assert sourceComponent in (
                'Section',
                'Verses',
                'Verse',
            )

            BibleOrgSysGlobals.checkXMLNoText(element, sourceReference, 'kls1')
            BibleOrgSysGlobals.checkXMLNoAttributes(element, sourceReference,
                                                    'kd21')
            BibleOrgSysGlobals.checkXMLNoTail(element, sourceReference, 'so20')

            actualRawLinksList = []
            for subelement in element:
                #vPrint( 'Quiet', debuggingThisModule, BibleOrgSysGlobals.elementStr( subelement ) )
                if subelement.tag in (
                        'sourceReference',
                        'sourceComponent',
                ):  # already processed these
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        subelement, sourceReference, 'ls12')
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        subelement, sourceReference, 'ks02')
                    BibleOrgSysGlobals.checkXMLNoTail(subelement,
                                                      sourceReference, 'sqw1')

                elif subelement.tag == 'BibleReferenceLink':
                    BibleOrgSysGlobals.checkXMLNoText(subelement,
                                                      sourceReference, 'haw9')
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        subelement, sourceReference, 'hs19')
                    BibleOrgSysGlobals.checkXMLNoTail(subelement,
                                                      sourceReference, 'jsd9')

                    targetReference = subelement.find('targetReference').text
                    targetComponent = subelement.find('targetComponent').text
                    assert targetComponent in (
                        'Section',
                        'Verses',
                        'Verse',
                    )
                    linkType = subelement.find('linkType').text
                    assert linkType in (
                        'TSK',
                        'QuotedOTReference',
                        'AlludedOTReference',
                        'PossibleOTReference',
                    )

                    actualRawLinksList.append((
                        targetReference,
                        targetComponent,
                        linkType,
                    ))
                    actualLinkCount += 1

            rawRefLinkList.append((
                sourceReference,
                sourceComponent,
                actualRawLinksList,
            ))

        vPrint(
            'Normal', debuggingThisModule,
            f"  {len(rawRefLinkList):,} raw links loaded (with {actualLinkCount:,} actual raw link entries)"
        )

        myRefLinkList = []
        actualLinkCount = 0
        BOS = BibleOrganisationalSystem('GENERIC-KJV-66-ENG')

        for j, (sourceReference, sourceComponent,
                actualRawLinksList) in enumerate(rawRefLinkList):
            # Just do some testing first
            if sourceComponent == 'Verse':
                x = SimpleVerseKey(sourceReference)
            else:
                flag = False
                try:
                    x = SimpleVerseKey(sourceReference, ignoreParseErrors=True)
                    flag = True
                except TypeError:
                    pass  # This should happen coz it should fail the SVK
                if flag:
                    logging.error("{} {!r} failed!".format(
                        sourceComponent, sourceReference))
                    raise TypeError
            # Now do the actual parsing
            parsedSourceReference = FlexibleVersesKey(sourceReference)
            if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                vPrint('Quiet', debuggingThisModule, j, sourceComponent,
                       sourceReference, parsedSourceReference)
                #assert parsedSourceReference.getShortText().replace(' ','_') == sourceReference
            actualLinksList = []
            for k, (targetReference, targetComponent,
                    linkType) in enumerate(actualRawLinksList):
                # Just do some testing first
                if targetComponent == 'Verse':
                    x = SimpleVerseKey(targetReference)
                else:
                    flag = False
                    try:
                        x = SimpleVerseKey(targetReference,
                                           ignoreParseErrors=True)
                        flag = True
                    except TypeError:
                        pass  # This should happen coz it should fail the SVK
                    if flag:
                        logging.error("{} {!r} failed!".format(
                            targetComponent, targetReference))
                        raise TypeError
                # Now do the actual parsing
                try:
                    parsedTargetReference = FlexibleVersesKey(targetReference)
                except TypeError:
                    logging.error(
                        "  Temporarily ignored {!r} (TypeError from FlexibleVersesKey)"
                        .format(targetReference))
                    parsedTargetReference = None
                if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                    vPrint('Quiet', debuggingThisModule, ' ', targetComponent,
                           targetReference, parsedTargetReference)
                    #assert parsedTargetReference.getShortText().replace(' ','_',1) == targetReference

                actualLinksList.append((
                    targetReference,
                    targetComponent,
                    parsedTargetReference,
                    linkType,
                ))
                actualLinkCount += 1

            myRefLinkList.append((
                sourceReference,
                sourceComponent,
                parsedSourceReference,
                actualLinksList,
            ))

        vPrint(
            'Normal', debuggingThisModule,
            "  {:,} links processed (with {:,} actual link entries)".format(
                len(rawRefLinkList), actualLinkCount))
        #vPrint( 'Quiet', debuggingThisModule, myRefLinkList ); halt
        self.__DataList = myRefLinkList

        # Now put it into my dictionaries for easy access
        # This part should be customized or added to for however you need to process the data

        # Create a link dictionary (by verse key)
        myRefLinkDict = {}
        for sourceReference, sourceComponent, parsedSourceReference, actualLinksList in myRefLinkList:
            #vPrint( 'Quiet', debuggingThisModule, sourceReference, sourceComponent, parsedSourceReference )
            #vPrint( 'Quiet', debuggingThisModule, sourceReference, sourceComponent, parsedSourceReference, actualLinksList )
            for verseRef in parsedSourceReference.getIncludedVerses():
                #vPrint( 'Quiet', debuggingThisModule, verseRef )
                assert isinstance(verseRef, SimpleVerseKey)
                if verseRef not in myRefLinkDict: myRefLinkDict[verseRef] = []
                myRefLinkDict[verseRef].append((
                    sourceReference,
                    sourceComponent,
                    parsedSourceReference,
                    actualLinksList,
                ))
            #vPrint( 'Quiet', debuggingThisModule, myRefLinkDict ); halt
        originalLinks = len(myRefLinkDict)
        vPrint(
            'Quiet', debuggingThisModule,
            "  {:,} verse links added to dictionary (includes filling out spans)"
            .format(originalLinks))
        #vPrint( 'Quiet', debuggingThisModule, myRefLinkDict ); halt

        # Create a reversed link dictionary (by verse key)
        for sourceReference, sourceComponent, parsedSourceReference, actualLinksList in myRefLinkList:
            #vPrint( 'Quiet', debuggingThisModule, sourceReference, sourceComponent, parsedSourceReference )
            #vPrint( 'Quiet', debuggingThisModule, sourceReference, sourceComponent, parsedSourceReference, actualLinksList )
            for targetReference, targetComponent, parsedTargetReference, linkType in actualLinksList:
                if parsedTargetReference is not None:
                    for verseRef in parsedTargetReference.getIncludedVerses():
                        #vPrint( 'Quiet', debuggingThisModule, verseRef )
                        assert isinstance(verseRef, SimpleVerseKey)
                        if linkType == 'TSK': reverseLinkType = 'TSKQuoted'
                        elif linkType == 'QuotedOTReference':
                            reverseLinkType = 'OTReferenceQuoted'
                        elif linkType == 'AlludedOTReference':
                            reverseLinkType = 'OTReferenceAlluded'
                        elif linkType == 'PossibleOTReference':
                            reverseLinkType = 'OTReferencePossible'
                        else:
                            halt  # Have a new linkType!
                        if verseRef not in myRefLinkDict:
                            myRefLinkDict[verseRef] = []
                        myRefLinkDict[verseRef].append(
                            (targetReference, targetComponent,
                             parsedTargetReference, [
                                 (sourceReference, sourceComponent,
                                  parsedSourceReference, reverseLinkType)
                             ]))
            #vPrint( 'Quiet', debuggingThisModule, myRefLinkDict ); halt
        totalLinks = len(myRefLinkDict)
        reverseLinks = totalLinks - originalLinks
        vPrint(
            'Quiet', debuggingThisModule,
            "  {:,} reverse links added to dictionary to give {:,} total".
            format(reverseLinks, totalLinks))
        #vPrint( 'Quiet', debuggingThisModule, myRefLinkDict ); halt

        self.__DataDict = myRefLinkDict

        # Let's find the most number of references for a verse
        mostReferences = totalReferences = 0
        for verseRef, entryList in self.__DataDict.items():
            numRefs = len(entryList)
            if numRefs > mostReferences:
                mostReferences, mostVerseRef = numRefs, verseRef
            totalReferences += numRefs
        vPrint(
            'Quiet', debuggingThisModule,
            "  {:,} maximum links for any one reference ({})".format(
                mostReferences, mostVerseRef.getShortText()))
        vPrint('Quiet', debuggingThisModule,
               "  {:,} total links for all references".format(totalReferences))

        return self.__DataList, self.__DataDict
    def __validate(self):
        """
        Check/validate the loaded data.
        """
        assert self._XMLTree

        uniqueDict = {}
        for elementName in self._uniqueElements:
            uniqueDict["Element_" + elementName] = []
        for attributeName in self._uniqueAttributes:
            uniqueDict["Attribute_" + attributeName] = []

        expectedID = 1
        for j, element in enumerate(self._XMLTree):
            if element.tag == self._mainElementTag:
                BibleOrgSysGlobals.checkXMLNoText(element, element.tag)
                BibleOrgSysGlobals.checkXMLNoTail(element, element.tag)
                if not self._compulsoryAttributes and not self._optionalAttributes:
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        element, element.tag)
                if not self._compulsoryElements and not self._optionalElements:
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        element, element.tag)

                # Check compulsory attributes on this main element
                for attributeName in self._compulsoryAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is None:
                        logging.error(
                            _("Compulsory {!r} attribute is missing from {} element in record {}"
                              ).format(attributeName, element.tag, j))
                    if not attributeValue:
                        logging.warning(
                            _("Compulsory {!r} attribute is blank on {} element in record {}"
                              ).format(attributeName, element.tag, j))

                # Check optional attributes on this main element
                for attributeName in self._optionalAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is not None:
                        if not attributeValue:
                            logging.warning(
                                _("Optional {!r} attribute is blank on {} element in record {}"
                                  ).format(attributeName, element.tag, j))

                # Check for unexpected additional attributes on this main element
                for attributeName in element.keys():
                    attributeValue = element.get(attributeName)
                    if attributeName not in self._compulsoryAttributes and attributeName not in self._optionalAttributes:
                        logging.warning(
                            _("Additional {!r} attribute ({!r}) found on {} element in record {}"
                              ).format(attributeName, attributeValue,
                                       element.tag, j))

                # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes)
                for attributeName in self._uniqueAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is not None:
                        if attributeValue in uniqueDict["Attribute_" +
                                                        attributeName]:
                            logging.error(
                                _("Found {!r} data repeated in {!r} field on {} element in record {}"
                                  ).format(attributeValue, attributeName,
                                           element.tag, j))
                        uniqueDict["Attribute_" +
                                   attributeName].append(attributeValue)

                # Get the sourceComponent to use as a record ID
                ID = element.find("sourceComponent").text

                # Check compulsory elements
                for elementName in self._compulsoryElements:
                    foundElement = element.find(elementName)
                    if foundElement is None:
                        logging.error(
                            _("Compulsory {!r} element is missing in record with ID {!r} (record {})"
                              ).format(elementName, ID, j))
                    else:
                        BibleOrgSysGlobals.checkXMLNoTail(
                            foundElement,
                            foundElement.tag + " in " + element.tag)
                        BibleOrgSysGlobals.checkXMLNoAttributes(
                            foundElement,
                            foundElement.tag + " in " + element.tag)
                        #BibleOrgSysGlobals.checkXMLNoSubelements( foundElement, foundElement.tag + " in " + element.tag )
                        if not foundElement.text:
                            logging.warning(
                                _("Compulsory {!r} element is blank in record with ID {!r} (record {})"
                                  ).format(elementName, ID, j))

                # Check optional elements
                for elementName in self._optionalElements:
                    foundElement = element.find(elementName)
                    if foundElement is not None:
                        BibleOrgSysGlobals.checkXMLNoTail(
                            foundElement,
                            foundElement.tag + " in " + element.tag)
                        BibleOrgSysGlobals.checkXMLNoAttributes(
                            foundElement,
                            foundElement.tag + " in " + element.tag)
                        BibleOrgSysGlobals.checkXMLNoSubelements(
                            foundElement,
                            foundElement.tag + " in " + element.tag)
                        if not foundElement.text:
                            logging.warning(
                                _("Optional {!r} element is blank in record with ID {!r} (record {})"
                                  ).format(elementName, ID, j))

                # Check for unexpected additional elements
                for subelement in element:
                    if subelement.tag not in self._compulsoryElements and subelement.tag not in self._optionalElements:
                        logging.warning(
                            _("Additional {!r} element ({!r}) found in record with ID {!r} (record {})"
                              ).format(subelement.tag, subelement.text, ID, j))

                # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements)
                for elementName in self._uniqueElements:
                    if element.find(elementName) is not None:
                        text = element.find(elementName).text
                        if text in uniqueDict["Element_" + elementName]:
                            logging.error(
                                _("Found {!r} data repeated in {!r} element in record with ID {!r} (record {})"
                                  ).format(text, elementName, ID, j))
                        uniqueDict["Element_" + elementName].append(text)
            else:
                logging.warning(
                    _("Unexpected element: {} in record {}").format(
                        element.tag, j))
            if element.tail is not None and element.tail.strip():
                logging.error(
                    _("Unexpected {!r} tail data after {} element in record {}"
                      ).format(element.tail, element.tag, j))
        if self._XMLTree.tail is not None and self._XMLTree.tail.strip():
            logging.error(
                _("Unexpected {!r} tail data after {} element").format(
                    self._XMLTree.tail, self._XMLTree.tag))
    def __validateSystem(self, bookOrderTree, systemName):
        """ Do a semi-automatic check of the XML file validity. """
        assert bookOrderTree

        uniqueDict = {}
        for elementName in self.uniqueElements:
            uniqueDict["Element_" + elementName] = []
        for attributeName in self.uniqueAttributes:
            uniqueDict["Attribute_" + attributeName] = []

        expectedID = 1
        for k, element in enumerate(bookOrderTree):
            if element.tag == self.mainElementTag:
                BibleOrgSysGlobals.checkXMLNoTail(element, element.tag)
                if not self.compulsoryAttributes and not self.optionalAttributes:
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        element, element.tag)
                if not self.compulsoryElements and not self.optionalElements:
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        element, element.tag)

                # Check ascending ID field
                ID = element.get("id")
                intID = int(ID)
                if intID != expectedID:
                    logging.error(
                        _("ID numbers out of sequence in record {} (got {} when expecting {}) for {}"
                          ).format(k, intID, expectedID, systemName))
                expectedID += 1

                # Check that this is unique
                if element.text:
                    if element.text in uniqueDict:
                        logging.error(
                            _("Found {!r} data repeated in {!r} element in record with ID {!r} (record {}) for {}"
                              ).format(element.text, element.tag, ID, k,
                                       systemName))
                    uniqueDict[element.text] = None

                # Check compulsory attributes on this main element
                for attributeName in self.compulsoryAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is None:
                        logging.error(
                            _("Compulsory {!r} attribute is missing from {} element in record {}"
                              ).format(attributeName, element.tag, k))
                    if not attributeValue:
                        logging.warning(
                            _("Compulsory {!r} attribute is blank on {} element in record {}"
                              ).format(attributeName, element.tag, k))

                # Check optional attributes on this main element
                for attributeName in self.optionalAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is not None:
                        if not attributeValue:
                            logging.warning(
                                _("Optional {!r} attribute is blank on {} element in record {}"
                                  ).format(attributeName, element.tag, k))

                # Check for unexpected additional attributes on this main element
                for attributeName in element.keys():
                    attributeValue = element.get(attributeName)
                    if attributeName not in self.compulsoryAttributes and attributeName not in self.optionalAttributes:
                        logging.warning(
                            _("Additional {!r} attribute ({!r}) found on {} element in record {}"
                              ).format(attributeName, attributeValue,
                                       element.tag, k))

                # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes)
                for attributeName in self.uniqueAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is not None:
                        if attributeValue in uniqueDict["Attribute_" +
                                                        attributeName]:
                            logging.error(
                                _("Found {!r} data repeated in {!r} field on {} element in record {}"
                                  ).format(attributeValue, attributeName,
                                           element.tag, k))
                        uniqueDict["Attribute_" +
                                   attributeName].append(attributeValue)

                # Check compulsory elements
                for elementName in self.compulsoryElements:
                    if element.find(elementName) is None:
                        logging.error(
                            _("Compulsory {!r} element is missing in record with ID {!r} (record {})"
                              ).format(elementName, ID, k))
                    if not element.find(elementName).text:
                        logging.warning(
                            _("Compulsory {!r} element is blank in record with ID {!r} (record {})"
                              ).format(elementName, ID, k))

                # Check optional elements
                for elementName in self.optionalElements:
                    if element.find(elementName) is not None:
                        if not element.find(elementName).text:
                            logging.warning(
                                _("Optional {!r} element is blank in record with ID {!r} (record {})"
                                  ).format(elementName, ID, k))

                # Check for unexpected additional elements
                for subelement in element:
                    if subelement.tag not in self.compulsoryElements and subelement.tag not in self.optionalElements:
                        logging.warning(
                            _("Additional {!r} element ({!r}) found in record with ID {!r} (record {})"
                              ).format(subelement.tag, subelement.text, ID, k))

                # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements)
                for elementName in self.uniqueElements:
                    if element.find(elementName) is not None:
                        text = element.find(elementName).text
                        if text in uniqueDict["Element_" + elementName]:
                            logging.error(
                                _("Found {!r} data repeated in {!r} element in record with ID {!r} (record {})"
                                  ).format(text, elementName, ID, k))
                        uniqueDict["Element_" + elementName].append(text)
            else:
                logging.warning(
                    _("Unexpected element: {} in record {}").format(
                        element.tag, k))
示例#7
0
    def __validateSystem( self, systemName ):
        """
        Checks for basic formatting/content errors in a Bible book name system.
        """
        assert systemName
        assert self.__XMLSystems[systemName]['tree']

        if len(self.__XMLSystems[systemName]["languageCode"]) != 3:
            logging.error( _("Couldn't find 3-letter language code in {!r} book names system").format( systemName ) )
        #if self.__ISOLanguages and not self.__ISOLanguages.isValidLanguageCode( self.__XMLSystems[systemName]["languageCode"] ): # Check that we have a valid language code
            #logging.error( _("Unrecognized {!r} ISO-639-3 language code in {!r} book names system").format( self.__XMLSystems[systemName]["languageCode"], systemName ) )

        uniqueDict = {}
        for index in range( len(self.mainElementTags) ):
            for elementName in self.uniqueElements[index]: uniqueDict["Element_"+str(index)+"_"+elementName] = []
            for attributeName in self.uniqueAttributes[index]: uniqueDict["Attribute_"+str(index)+"_"+attributeName] = []

        expectedID = 1
        for k,element in enumerate(self.__XMLSystems[systemName]['tree']):
            if element.tag in self.mainElementTags:
                BibleOrgSysGlobals.checkXMLNoText( element, element.tag )
                BibleOrgSysGlobals.checkXMLNoTail( element, element.tag )
                if not self.compulsoryAttributes and not self.optionalAttributes: BibleOrgSysGlobals.checkXMLNoAttributes( element, element.tag )
                if not self.compulsoryElements and not self.optionalElements: BibleOrgSysGlobals.checkXMLNoSubelements( element, element.tag )

                index = self.mainElementTags.index( element.tag )

                # Check compulsory attributes on this main element
                for attributeName in self.compulsoryAttributes[index]:
                    attributeValue = element.get( attributeName )
                    if attributeValue is None:
                        logging.error( _("Compulsory {!r} attribute is missing from {} element in record {} in {}").format( attributeName, element.tag, k, systemName ) )
                    if not attributeValue:
                        logging.warning( _("Compulsory {!r} attribute is blank on {} element in record {} in {}").format( attributeName, element.tag, k, systemName ) )

                # Check optional attributes on this main element
                for attributeName in self.optionalAttributes[index]:
                    attributeValue = element.get( attributeName )
                    if attributeValue is not None:
                        if not attributeValue:
                            logging.warning( _("Optional {!r} attribute is blank on {} element in record {} in {}").format( attributeName, element.tag, k, systemName ) )

                # Check for unexpected additional attributes on this main element
                for attributeName in element.keys():
                    attributeValue = element.get( attributeName )
                    if attributeName not in self.compulsoryAttributes[index] and attributeName not in self.optionalAttributes[index]:
                        logging.warning( _("Additional {!r} attribute ({!r}) found on {} element in record {} in {}").format( attributeName, attributeValue, element.tag, k, systemName ) )

                # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes)
                for attributeName in self.uniqueAttributes[index]:
                    attributeValue = element.get( attributeName )
                    if attributeValue is not None:
                        if attributeValue in uniqueDict["Attribute_"+str(index)+"_"+attributeName]:
                            logging.error( _("Found {!r} data repeated in {!r} field on {} element in record {} in {}").format( attributeValue, attributeName, element.tag, k, systemName ) )
                        uniqueDict["Attribute_"+str(index)+"_"+attributeName].append( attributeValue )

                # Check compulsory elements
                for elementName in self.compulsoryElements[index]:
                    if element.find( elementName ) is None:
                        logging.error( _("Compulsory {!r} element is missing (record {}) in {}").format( elementName, k, systemName ) )
                    if not element.find( elementName ).text:
                        logging.warning( _("Compulsory {!r} element is blank (record {}) in {}").format( elementName, k, systemName ) )

                # Check optional elements
                for elementName in self.optionalElements[index]:
                    if element.find( elementName ) is not None:
                        if not element.find( elementName ).text:
                            logging.warning( _("Optional {!r} element is blank (record {}) in {}").format( elementName, k, systemName ) )

                # Check for unexpected additional elements
                for subelement in element:
                    if subelement.tag not in self.compulsoryElements[index] and subelement.tag not in self.optionalElements[index]:
                        logging.warning( _("Additional {!r} element ({!r}) found (record {}) in {} {}").format( subelement.tag, subelement.text, k, systemName, element.tag ) )

                # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements)
                for elementName in self.uniqueElements[index]:
                    if element.find( elementName ) is not None:
                        text = element.find( elementName ).text
                        if text in uniqueDict["Element_"+str(index)+"_"+elementName]:
                            myLogging = logging.info if element.tag == 'BibleDivisionNames' else logging.error
                            myLogging( _("Found {!r} data repeated in {!r} element (record {}) in {}").format( text, elementName, k, systemName ) )
                        uniqueDict["Element_"+str(index)+"_"+elementName].append( text )
            else:
                logging.warning( _("Unexpected element: {} in record {} in {}").format( element.tag, k, systemName ) )
示例#8
0
    def __validateAndExtractChapter(self, BBB: str, thisBook, chapter):
        """
        Check/validate and extract chapter data from the given XML book record
            finding and saving chapter numbers and
            finding and saving verse elements.
        """

        vPrint('Verbose', debuggingThisModule, _("Validating XML chapter…"))

        # Process the div attributes first
        chapterNumber = numVerses = None
        for attrib, value in chapter.items():
            if attrib == "n":
                chapterNumber = value
            elif attrib == "VERSES":
                numVerses = value
            else:
                logging.warning(
                    "Unprocessed {!r} attribute ({}) in chapter element".
                    format(attrib, value))
        if chapterNumber:
            #dPrint( 'Quiet', debuggingThisModule, BBB, 'c', chapterNumber )
            chapterNumber = chapterNumber.replace(
                'of Solomon ', '')  # Fix a mistake in the Chinese_SU module
            thisBook.addLine('c', chapterNumber)
        else:
            logging.error(
                "Missing 'n' attribute in chapter element for {}".format(BBB))

        for element in chapter:
            if element.tag == OpenSongXMLBible.verseTag:
                sublocation = "verse in {} {}".format(BBB, chapterNumber)
                BibleOrgSysGlobals.checkXMLNoTail(element, sublocation, 'l5ks')
                verseNumber = toVerseNumber = None
                for attrib, value in element.items():
                    if attrib == "n":
                        verseNumber = value
                    elif attrib == "t":
                        toVerseNumber = value
                    else:
                        logging.warning(
                            "Unprocessed {!r} attribute ({}) in verse element".
                            format(attrib, value))
                if BibleOrgSysGlobals.debugFlag: assert verseNumber
                #thisBook.addLine( 'v', verseNumber )
                vText = element.text if element.text else ''
                for subelement in element:
                    sub2location = "{} in {}".format(subelement.tag,
                                                     sublocation)
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        subelement, sub2location, 'ks03')
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        subelement, sub2location, 'ks05')
                    if subelement.tag == 'i':
                        vText += '\\it {}\\it*{}'.format(
                            subelement.text, subelement.tail)
                    else:
                        logging.error(
                            "Expected to find 'i' but got {!r}".format(
                                subelement.tag))
                vText += element.tail if element.tail else ''
                if not vText:
                    logging.warning("{} {}:{} has no text".format(
                        BBB, chapterNumber, verseNumber))
                #dPrint( 'Quiet', debuggingThisModule, 'vText1', vText )
                if vText:  # This is the main text of the verse (follows the verse milestone)
                    #dPrint( 'Quiet', debuggingThisModule, "{} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, vText ) )
                    if '\n' in vText:  # This is how they represent poety
                        #dPrint( 'Quiet', debuggingThisModule, "vText", repr(vText), repr(element.text) )
                        for j, textBit in enumerate(vText.split('\n')):
                            if j == 0:
                                thisBook.addLine('q1', '')
                                thisBook.addLine('v',
                                                 verseNumber + ' ' + textBit)
                            else:
                                thisBook.addLine('q1', textBit)
                    else:  # Just one verse line
                        thisBook.addLine('v', verseNumber + ' ' + vText)
                #dPrint( 'Quiet', debuggingThisModule, 'vText2', vText )
            else:
                logging.error("Expected to find {!r} but got {!r}".format(
                    OpenSongXMLBible.verseTag, element.tag))
示例#9
0
    def validateEntry(self, entry) -> None:
        """
        Check/validate the given Strongs Greek lexicon entry.

        Adds good entries to self.StrongsEntries.
        """
        if BibleOrgSysGlobals.debugFlag: assert entry.tag == 'entry'
        BibleOrgSysGlobals.checkXMLNoText(entry, entry.tag, "na19")
        BibleOrgSysGlobals.checkXMLNoTail(entry, entry.tag, "kaq9")

        # Process the entry attributes first
        strongs5 = None
        for attrib, value in entry.items():
            if attrib == 'strongs':
                strongs5 = value
                #dPrint( 'Never', debuggingThisModule, f"Validating {strongs5} entry…" )
            else:
                logging.warning(
                    "Unprocessed {!r} attribute ({}) in main entry element".
                    format(attrib, value))
        if BibleOrgSysGlobals.debugFlag:
            assert len(strongs5) == 5 and strongs5.isdigit()

        entryResults = {}
        entryString = ""
        gettingEssentials = True
        for j, element in enumerate(entry):
            #dPrint( 'Quiet', debuggingThisModule, strongs5, j, element.tag, repr(entryString) )
            if element.tag == "strongs":
                if BibleOrgSysGlobals.debugFlag:
                    assert gettingEssentials and j == 0 and element.text
                BibleOrgSysGlobals.checkXMLNoAttributes(
                    element, element.tag, "md3d")
                if strongs5 != '02717' and (3203 > int(strongs5) > 3302):
                    BibleOrgSysGlobals.checkXMLNoTail(element, element.tag,
                                                      "f3g7")
                BibleOrgSysGlobals.checkXMLNoSubelements(
                    element, element.tag, "m56g")
                strongs = element.text
                if BibleOrgSysGlobals.debugFlag:
                    assert strongs5.endswith(strongs)
                if element.tail and element.tail.strip():
                    entryString += element.tail.strip()
            elif element.tag == "greek":
                location = "greek in Strongs " + strongs5
                BibleOrgSysGlobals.checkXMLNoText(element, location, "jke0")
                #BibleOrgSysGlobals.checkXMLNoTail( element, location, "ks24" )
                BibleOrgSysGlobals.checkXMLNoSubelements(
                    element, location, "df35")
                # Process the attributes
                translit = greek = beta = None
                for attrib, value in element.items():
                    if attrib == "translit": translit = value
                    elif attrib == "unicode": greek = value
                    elif attrib == "BETA": beta = value
                    else:
                        logging.warning(
                            "scs4 Unprocessed {!r} attribute ({}) in {}".
                            format(attrib, value, location))
                if BibleOrgSysGlobals.debugFlag:
                    assert greek and translit and beta
                if 'word' not in entryResults:  # This is the first/main entry
                    if BibleOrgSysGlobals.debugFlag:
                        assert gettingEssentials and j == 1
                    BibleOrgSysGlobals.checkXMLNoTail(element, location,
                                                      "ks24")
                    entryResults['word'] = (greek, translit, beta)
                else:
                    #dPrint( 'Quiet', debuggingThisModule, "Have multiple greek entries in " + strongs5 )
                    if BibleOrgSysGlobals.debugFlag: assert j > 2
                    gettingEssentials = False
                    entryString += ' ' + BibleOrgSysGlobals.getFlattenedXML(
                        element, strongs5)  #.replace( '\n', '' )
            elif element.tag == "pronunciation":
                location = "pronunciation in Strongs " + strongs5
                BibleOrgSysGlobals.checkXMLNoText(element, location, "iw9k")
                BibleOrgSysGlobals.checkXMLNoSubelements(
                    element, location, "0s20")
                # Process the attributes
                pronunciation = None
                for attrib, value in element.items():
                    if attrib == "strongs": pronunciation = value
                    else:
                        logging.warning(
                            "scs4 Unprocessed {!r} attribute ({}) in {}".
                            format(attrib, value, location))
                if gettingEssentials:
                    #BibleOrgSysGlobals.checkXMLNoTail( element, location, "kd02" )
                    if BibleOrgSysGlobals.debugFlag:
                        assert j == 2
                        assert pronunciation
                        assert 'pronunciation' not in entryResults
                    entryResults['pronunciation'] = pronunciation
                else:
                    if BibleOrgSysGlobals.debugFlag:
                        assert j > 2 and not gettingEssentials
                    if element.tail and element.tail.strip():
                        entryString += element.tail.strip().replace('\n', '')
            elif element.tag == "strongs_derivation":
                location = "strongs_derivation in Strongs " + strongs5
                BibleOrgSysGlobals.checkXMLNoAttributes(
                    element, location, "jke0")
                BibleOrgSysGlobals.checkXMLNoTail(element, location, "ks24")
                derivation = BibleOrgSysGlobals.getFlattenedXML(
                    element, strongs5).replace('\n', '')
                #dPrint( 'Quiet', debuggingThisModule, strongs5, "derivation", repr(derivation) )
                if BibleOrgSysGlobals.debugFlag:
                    assert derivation and '\t' not in derivation and '\n' not in derivation
                entryString += derivation
            elif element.tag == "strongs_def":
                location = "strongs_def in Strongs " + strongs5
                BibleOrgSysGlobals.checkXMLNoAttributes(
                    element, location, "jke0")
                BibleOrgSysGlobals.checkXMLNoTail(element, location, "jd28")
                definition = BibleOrgSysGlobals.getFlattenedXML(
                    element, strongs5).replace('\n', '')
                #dPrint( 'Quiet', debuggingThisModule, strongs5, "definition", repr(definition) )
                if BibleOrgSysGlobals.debugFlag:
                    assert definition and '\t' not in definition and '\n' not in definition
                entryString += definition
            elif element.tag == "kjv_def":
                location = "kjv_def in Strongs " + strongs5
                BibleOrgSysGlobals.checkXMLNoAttributes(
                    element, location, "jke0")
                #BibleOrgSysGlobals.checkXMLNoTail( element, location, "8s2s" )
                #BibleOrgSysGlobals.checkXMLNoSubelements( element, location, "dvb2" )
                KJVdefinition = BibleOrgSysGlobals.getFlattenedXML(
                    element, strongs5).replace('\n', '')
                #dPrint( 'Quiet', debuggingThisModule, strongs5, "KJVdefinition", repr(KJVdefinition), repr(entryString) )
                if BibleOrgSysGlobals.debugFlag:
                    assert KJVdefinition and '\t' not in KJVdefinition and '\n' not in KJVdefinition
                entryString += KJVdefinition
            elif element.tag == "strongsref":
                location = "strongsref in Strongs " + strongs5
                BibleOrgSysGlobals.checkXMLNoText(element, location, "kls2")
                BibleOrgSysGlobals.checkXMLNoSubelements(
                    element, location, "ks24")
                strongsRef = BibleOrgSysGlobals.getFlattenedXML(
                    element, strongs5).replace('\n', '')
                if BibleOrgSysGlobals.debugFlag:
                    assert strongsRef and '\t' not in strongsRef and '\n' not in strongsRef
                strongsRef = re.sub('<language="GREEK" strongs="(\d{1,5})">',
                                    r'<StrongsRef>G\1</StrongsRef>',
                                    strongsRef)
                strongsRef = re.sub('<strongs="(\d{1,5})" language="GREEK">',
                                    r'<StrongsRef>G\1</StrongsRef>',
                                    strongsRef)
                #strongsRef = re.sub( '<language="HEBREW" strongs="(\d{1,5})">', r'<StrongsRef>H\1</StrongsRef>', strongsRef )
                #strongsRef = re.sub( '<strongs="(\d{1,5})" language="HEBREW">', r'<StrongsRef>H\1</StrongsRef>', strongsRef )
                #dPrint( 'Quiet', debuggingThisModule, strongs5, "strongsRef", repr(strongsRef) )
                entryString += ' ' + strongsRef
            elif element.tag == "see":
                location = "see in Strongs " + strongs5
                BibleOrgSysGlobals.checkXMLNoText(element, location, "iw9k")
                BibleOrgSysGlobals.checkXMLNoTail(element, location, "kd02")
                BibleOrgSysGlobals.checkXMLNoSubelements(
                    element, location, "0s20")
                # Process the attributes
                seeLanguage = seeStrongsNumber = None
                for attrib, value in element.items():
                    if attrib == "language": seeLanguage = value
                    elif attrib == "strongs":
                        seeStrongsNumber = value  # Note: No leading zeroes here
                    else:
                        logging.warning(
                            "scs4 Unprocessed {!r} attribute ({}) in {}".
                            format(attrib, value, location))
                if BibleOrgSysGlobals.debugFlag:
                    assert seeLanguage and seeStrongsNumber and seeStrongsNumber.isdigit(
                    )
                    assert seeLanguage in (
                        'GREEK',
                        'HEBREW',
                    )
                if 'see' not in entryResults: entryResults['see'] = []
                entryResults['see'].append((
                    'G' if seeLanguage == 'GREEK' else 'H') + seeStrongsNumber)
            else:
                logging.error(
                    "2d4f Unprocessed {!r} element ({}) in entry".format(
                        element.tag, element.text))

        if entryString:
            #dPrint( 'Quiet', debuggingThisModule, strongs5, "entryString", repr(entryString) )
            if BibleOrgSysGlobals.debugFlag:
                assert '\t' not in entryString and '\n' not in entryString
            entryString = re.sub(
                '<strongsref language="GREEK" strongs="(\d{1,5})"></strongsref>',
                r'<StrongsRef>G\1</StrongsRef>', entryString)
            entryString = re.sub(
                '<strongsref strongs="(\d{1,5})" language="GREEK"></strongsref>',
                r'<StrongsRef>G\1</StrongsRef>', entryString)
            entryString = re.sub(
                '<strongsref language="HEBREW" strongs="(\d{1,5})"></strongsref>',
                r'<StrongsRef>H\1</StrongsRef>', entryString)
            entryString = re.sub(
                '<strongsref strongs="(\d{1,5})" language="HEBREW"></strongsref>',
                r'<StrongsRef>H\1</StrongsRef>', entryString)
            if BibleOrgSysGlobals.debugFlag:
                assert 'strongsref' not in entryString
            entryResults['Entry'] = entryString
        #dPrint( 'Quiet', debuggingThisModule, "entryResults", entryResults )
        self.StrongsEntries[strongs] = entryResults
示例#10
0
    def _validateSystem(self, punctuationTree, systemName):
        """
        """
        assert punctuationTree

        uniqueDict = {}
        for elementName in self.uniqueElements:
            uniqueDict["Element_" + elementName] = []
        for attributeName in self.uniqueAttributes:
            uniqueDict["Attribute_" + attributeName] = []

        for k, element in enumerate(punctuationTree):
            if element.tag in self.mainElementTags:
                BibleOrgSysGlobals.checkXMLNoTail(element, element.tag)
                if not self.compulsoryAttributes and not self.optionalAttributes:
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        element, element.tag)
                if not self.compulsoryElements and not self.optionalElements:
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        element, element.tag)

                # Check compulsory attributes on this main element
                for attributeName in self.compulsoryAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is None:
                        logging.error(
                            _("Compulsory {!r} attribute is missing from {} element in record {}"
                              ).format(attributeName, element.tag, k))
                    if not attributeValue:
                        logging.warning(
                            _("Compulsory {!r} attribute is blank on {} element in record {}"
                              ).format(attributeName, element.tag, k))

                # Check optional attributes on this main element
                for attributeName in self.optionalAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is not None:
                        if not attributeValue:
                            logging.warning(
                                _("Optional {!r} attribute is blank on {} element in record {}"
                                  ).format(attributeName, element.tag, k))

                # Check for unexpected additional attributes on this main element
                for attributeName in element.keys():
                    attributeValue = element.get(attributeName)
                    if attributeName not in self.compulsoryAttributes and attributeName not in self.optionalAttributes:
                        logging.warning(
                            _("Additional {!r} attribute ({!r}) found on {} element in record {}"
                              ).format(attributeName, attributeValue,
                                       element.tag, k))

                # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes)
                for attributeName in self.uniqueAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is not None:
                        if attributeValue in uniqueDict["Attribute_" +
                                                        attributeName]:
                            logging.error(
                                _("Found {!r} data repeated in {!r} field on {} element in record {}"
                                  ).format(attributeValue, attributeName,
                                           element.tag, k))
                        uniqueDict["Attribute_" +
                                   attributeName].append(attributeValue)

                # Check compulsory elements
                for elementName in self.compulsoryElements:
                    if element.find(elementName) is None:
                        logging.error(
                            _("Compulsory {!r} element is missing in record with ID {!r} (record {})"
                              ).format(elementName, ID, k))
                    if not element.find(elementName).text:
                        logging.warning(
                            _("Compulsory {!r} element is blank in record with ID {!r} (record {})"
                              ).format(elementName, ID, k))

                # Check optional elements
                for elementName in self.optionalElements:
                    if element.find(elementName) is not None:
                        if not element.find(elementName).text:
                            logging.warning(
                                _("Optional {!r} element is blank in record with ID {!r} (record {})"
                                  ).format(elementName, ID, k))

                # Check for unexpected additional elements
                for subelement in element:
                    if subelement.tag not in self.compulsoryElements and subelement.tag not in self.optionalElements:
                        logging.warning(
                            _("Additional {!r} element ({!r}) found in record with ID {!r} (record {})"
                              ).format(subelement.tag, subelement.text, ID, k))

                # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements)
                for elementName in self.uniqueElements:
                    if element.find(elementName) is not None:
                        text = element.find(elementName).text
                        if text in uniqueDict["Element_" + elementName]:
                            logging.error(
                                _("Found {!r} data repeated in {!r} element in record with ID {!r} (record {})"
                                  ).format(text, elementName, ID, k))
                        uniqueDict["Element_" + elementName].append(text)
            else:
                logging.warning(
                    _("Unexpected element: {} in record {}").format(
                        element.tag, k))