Exemplos de ElementTree.remove em Python, exemplos de xml.etree.ElementTree.ElementTree.remove em Python

Exemplo n.º 1

0

Exibir arquivo

def _remove_old_items(channel: ElementTree) -> None:
    items = channel.findall("item")
    num_to_delete = max(0, len(items) - MAX_KEEP_ITEMS)
    elements_to_delete = items[:num_to_delete]

    for element in elements_to_delete:
        channel.remove(element)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: file_operations.py Projeto: uva-bodysim/BodySim

def remove_simulation(session_path, simulation_name):
    """Removes the simulation from the session file and the session
     folder. Returns the state of the deleted simulation.
    """
    deleted_state = SimulationState.Saved
    tree = ET().parse(session_path + '.xml')
    for simulation in tree.findall('simulation'):
        if (simulation.find('name').text == simulation_name):
            deleted_state = int(simulation.attrib['in_batch'])
            tree.remove(simulation)

    indent(tree)

    with open(session_path + '.xml', 'wb') as f:
        f.write(tostring(tree))

    shutil.rmtree(session_path + os.sep + simulation_name)

    return deleted_state

Exemplo n.º 3

0

Exibir arquivo

Arquivo: file_operations.py Projeto: uva-bodysim/BodySim

def remove_simulation(session_path, simulation_name):
    """Removes the simulation from the session file and the session
     folder. Returns the state of the deleted simulation.
    """
    deleted_state = SimulationState.Saved
    tree = ET().parse(session_path + '.xml')
    for simulation in tree.findall('simulation'):
        if (simulation.find('name').text == simulation_name):
            deleted_state = int(simulation.attrib['in_batch'])
            tree.remove(simulation)

    indent(tree)

    with open(session_path + '.xml', 'wb') as f:
        f.write(tostring(tree))

    shutil.rmtree(session_path + os.sep + simulation_name)

    return deleted_state

Exemplo n.º 4

0

Exibir arquivo

Arquivo: BibleReferencesLinksConverter.py Projeto: janfri/BibleOrgSys

class BibleReferencesLinksConverter:
    """
    Class for reading, validating, and converting BibleReferencesLinks.
    This is only intended as a transitory class (used at start-up).
    The BibleReferencesLinks class has functions more generally useful.
    """
    def __init__(
            self):  # We can't give this parameters because of the singleton
        """
        Constructor: expects the filepath of the source XML file.
        Loads (and crudely validates the XML file) into an element tree.
        """
        self._filenameBase = 'BibleReferencesLinks'

        # These fields are used for parsing the XML
        self._treeTag = 'BibleReferencesLinks'
        self._headerTag = 'header'
        self._mainElementTag = 'BibleReferenceLinks'

        # These fields are used for automatically checking/validating the XML
        self._compulsoryAttributes = ()
        self._optionalAttributes = ()
        self._uniqueAttributes = self._compulsoryAttributes + self._optionalAttributes
        self._compulsoryElements = (
            'sourceReference',
            'sourceComponent',
            'BibleReferenceLink',
        )
        self._optionalElements = ()
        self._uniqueElements = ('sourceReference')

        # These are fields that we will fill later
        self._XMLheader, self._XMLTree = None, None
        self.__DataList = {}  # Used for import
        self.titleString = self.PROGRAM_VERSION = self.dateString = ''

    # end of BibleReferencesLinksConverter.__init__

    def loadAndValidate(self, XMLFileOrFilepath=None):
        """
        Loads (and crudely validates the XML file) into an element tree.
            Allows the filepath of the source XML file to be specified, otherwise uses the default.
        """
        if self._XMLTree is None:  # We mustn't have already have loaded the data
            if XMLFileOrFilepath is None:
                # XMLFileOrFilepath = BibleOrgSysGlobals.BOS_DATAFILES_FOLDERPATH.joinpath( self._filenameBase + '.xml' ) # Relative to module, not cwd
                import importlib.resources  # From Python 3.7 onwards -- handles zipped resources also
                XMLFileOrFilepath = importlib.resources.open_text(
                    'BibleOrgSys.DataFiles', self._filenameBase + '.xml')

            self.__load(XMLFileOrFilepath)
            if BibleOrgSysGlobals.strictCheckingFlag:
                self.__validate()
        else:  # The data must have been already loaded
            if XMLFileOrFilepath is not None and XMLFileOrFilepath != self.__XMLFileOrFilepath:
                logging.error(
                    _("Bible references links are already loaded -- your different filepath of {!r} was ignored"
                      ).format(XMLFileOrFilepath))
        return self

    # end of BibleReferencesLinksConverter.loadAndValidate

    def __load(self, XMLFileOrFilepath):
        """
        Load the source XML file and remove the header from the tree.
        Also, extracts some useful elements from the header element.
        """
        assert XMLFileOrFilepath
        self.__XMLFileOrFilepath = XMLFileOrFilepath
        assert self._XMLTree is None or len(
            self._XMLTree) == 0  # Make sure we're not doing this twice

        vPrint(
            'Info', debuggingThisModule,
            _("Loading BibleReferencesLinks XML file from {!r}…").format(
                self.__XMLFileOrFilepath))
        self._XMLTree = ElementTree().parse(self.__XMLFileOrFilepath)
        assert self._XMLTree  # Fail here if we didn't load anything at all

        if self._XMLTree.tag == self._treeTag:
            header = self._XMLTree[0]
            if header.tag == self._headerTag:
                self.XMLheader = header
                self._XMLTree.remove(header)
                BibleOrgSysGlobals.checkXMLNoText(header, 'header')
                BibleOrgSysGlobals.checkXMLNoTail(header, 'header')
                BibleOrgSysGlobals.checkXMLNoAttributes(header, 'header')
                if len(header) > 1:
                    logging.info(_("Unexpected elements in header"))
                elif len(header) == 0:
                    logging.info(_("Missing work element in header"))
                else:
                    work = header[0]
                    BibleOrgSysGlobals.checkXMLNoText(work, "work in header")
                    BibleOrgSysGlobals.checkXMLNoTail(work, "work in header")
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        work, "work in header")
                    if work.tag == "work":
                        self.PROGRAM_VERSION = work.find('version').text
                        self.dateString = work.find('date').text
                        self.titleString = work.find('title').text
                    else:
                        logging.warning(_("Missing work element in header"))
            else:
                logging.warning(
                    _("Missing header element (looking for {!r} tag)".format(
                        self._headerTag)))
            if header.tail is not None and header.tail.strip():
                logging.error(
                    _("Unexpected {!r} tail data after header").format(
                        header.tail))
        else:
            logging.error(
                _("Expected to load {!r} but got {!r}").format(
                    self._treeTag, self._XMLTree.tag))

    # end of BibleReferencesLinksConverter.__load

    def __validate(self):
        """
        Check/validate the loaded data.
        """
        assert self._XMLTree

        uniqueDict = {}
        for elementName in self._uniqueElements:
            uniqueDict["Element_" + elementName] = []
        for attributeName in self._uniqueAttributes:
            uniqueDict["Attribute_" + attributeName] = []

        expectedID = 1
        for j, element in enumerate(self._XMLTree):
            if element.tag == self._mainElementTag:
                BibleOrgSysGlobals.checkXMLNoText(element, element.tag)
                BibleOrgSysGlobals.checkXMLNoTail(element, element.tag)
                if not self._compulsoryAttributes and not self._optionalAttributes:
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        element, element.tag)
                if not self._compulsoryElements and not self._optionalElements:
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        element, element.tag)

                # Check compulsory attributes on this main element
                for attributeName in self._compulsoryAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is None:
                        logging.error(
                            _("Compulsory {!r} attribute is missing from {} element in record {}"
                              ).format(attributeName, element.tag, j))
                    if not attributeValue:
                        logging.warning(
                            _("Compulsory {!r} attribute is blank on {} element in record {}"
                              ).format(attributeName, element.tag, j))

                # Check optional attributes on this main element
                for attributeName in self._optionalAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is not None:
                        if not attributeValue:
                            logging.warning(
                                _("Optional {!r} attribute is blank on {} element in record {}"
                                  ).format(attributeName, element.tag, j))

                # Check for unexpected additional attributes on this main element
                for attributeName in element.keys():
                    attributeValue = element.get(attributeName)
                    if attributeName not in self._compulsoryAttributes and attributeName not in self._optionalAttributes:
                        logging.warning(
                            _("Additional {!r} attribute ({!r}) found on {} element in record {}"
                              ).format(attributeName, attributeValue,
                                       element.tag, j))

                # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes)
                for attributeName in self._uniqueAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is not None:
                        if attributeValue in uniqueDict["Attribute_" +
                                                        attributeName]:
                            logging.error(
                                _("Found {!r} data repeated in {!r} field on {} element in record {}"
                                  ).format(attributeValue, attributeName,
                                           element.tag, j))
                        uniqueDict["Attribute_" +
                                   attributeName].append(attributeValue)

                # Get the sourceComponent to use as a record ID
                ID = element.find("sourceComponent").text

                # Check compulsory elements
                for elementName in self._compulsoryElements:
                    foundElement = element.find(elementName)
                    if foundElement is None:
                        logging.error(
                            _("Compulsory {!r} element is missing in record with ID {!r} (record {})"
                              ).format(elementName, ID, j))
                    else:
                        BibleOrgSysGlobals.checkXMLNoTail(
                            foundElement,
                            foundElement.tag + " in " + element.tag)
                        BibleOrgSysGlobals.checkXMLNoAttributes(
                            foundElement,
                            foundElement.tag + " in " + element.tag)
                        #BibleOrgSysGlobals.checkXMLNoSubelements( foundElement, foundElement.tag + " in " + element.tag )
                        if not foundElement.text:
                            logging.warning(
                                _("Compulsory {!r} element is blank in record with ID {!r} (record {})"
                                  ).format(elementName, ID, j))

                # Check optional elements
                for elementName in self._optionalElements:
                    foundElement = element.find(elementName)
                    if foundElement is not None:
                        BibleOrgSysGlobals.checkXMLNoTail(
                            foundElement,
                            foundElement.tag + " in " + element.tag)
                        BibleOrgSysGlobals.checkXMLNoAttributes(
                            foundElement,
                            foundElement.tag + " in " + element.tag)
                        BibleOrgSysGlobals.checkXMLNoSubelements(
                            foundElement,
                            foundElement.tag + " in " + element.tag)
                        if not foundElement.text:
                            logging.warning(
                                _("Optional {!r} element is blank in record with ID {!r} (record {})"
                                  ).format(elementName, ID, j))

                # Check for unexpected additional elements
                for subelement in element:
                    if subelement.tag not in self._compulsoryElements and subelement.tag not in self._optionalElements:
                        logging.warning(
                            _("Additional {!r} element ({!r}) found in record with ID {!r} (record {})"
                              ).format(subelement.tag, subelement.text, ID, j))

                # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements)
                for elementName in self._uniqueElements:
                    if element.find(elementName) is not None:
                        text = element.find(elementName).text
                        if text in uniqueDict["Element_" + elementName]:
                            logging.error(
                                _("Found {!r} data repeated in {!r} element in record with ID {!r} (record {})"
                                  ).format(text, elementName, ID, j))
                        uniqueDict["Element_" + elementName].append(text)
            else:
                logging.warning(
                    _("Unexpected element: {} in record {}").format(
                        element.tag, j))
            if element.tail is not None and element.tail.strip():
                logging.error(
                    _("Unexpected {!r} tail data after {} element in record {}"
                      ).format(element.tail, element.tag, j))
        if self._XMLTree.tail is not None and self._XMLTree.tail.strip():
            logging.error(
                _("Unexpected {!r} tail data after {} element").format(
                    self._XMLTree.tail, self._XMLTree.tag))

    # end of BibleReferencesLinksConverter.__validate

    def __str__(self) -> str:
        """
        This method returns the string representation of a Bible book code.

        @return: the name of a Bible object formatted as a string
        @rtype: string
        """
        indent = 2
        result = "BibleReferencesLinksConverter object"
        if self.titleString:
            result += ('\n' if result else
                       '') + ' ' * indent + _("Title: {}").format(
                           self.titleString)
        if self.PROGRAM_VERSION:
            result += ('\n' if result else
                       '') + ' ' * indent + _("Version: {}").format(
                           self.PROGRAM_VERSION)
        if self.dateString:
            result += ('\n' if result else ''
                       ) + ' ' * indent + _("Date: {}").format(self.dateString)
        if self._XMLTree is not None:
            result += ('\n' if result else '') + ' ' * indent + _(
                "Number of entries = {:,}").format(len(self._XMLTree))
        return result

    # end of BibleReferencesLinksConverter.__str__

    def __len__(self):
        """
        Returns the number of references links loaded.
        """
        return len(self._XMLTree)

    # end of BibleReferencesLinksConverter.__len__

    def importDataToPython(self):
        """
        Loads (and pivots) the data (not including the header) into suitable Python containers to use in a Python program.
        (Of course, you can just use the elementTree in self._XMLTree if you prefer.)
        """
        def makeList(parameter1, parameter2):
            """
            Returns a list containing all parameters. Parameter1 may already be a list.
            """
            if isinstance(parameter1, list):
                #assert parameter2 not in parameter1
                parameter1.append(parameter2)
                return parameter1
            else:
                return [parameter1, parameter2]

        # end of makeList

        assert self._XMLTree
        if self.__DataList:  # We've already done an import/restructuring -- no need to repeat it
            return self.__DataList, self.__DataDict

        # We'll create a number of dictionaries with different elements as the key
        rawRefLinkList = []
        actualLinkCount = 0
        for element in self._XMLTree:
            #vPrint( 'Quiet', debuggingThisModule, BibleOrgSysGlobals.elementStr( element ) )

            # Get these first for helpful error messages
            sourceReference = element.find('sourceReference').text
            sourceComponent = element.find('sourceComponent').text
            assert sourceComponent in (
                'Section',
                'Verses',
                'Verse',
            )

            BibleOrgSysGlobals.checkXMLNoText(element, sourceReference, 'kls1')
            BibleOrgSysGlobals.checkXMLNoAttributes(element, sourceReference,
                                                    'kd21')
            BibleOrgSysGlobals.checkXMLNoTail(element, sourceReference, 'so20')

            actualRawLinksList = []
            for subelement in element:
                #vPrint( 'Quiet', debuggingThisModule, BibleOrgSysGlobals.elementStr( subelement ) )
                if subelement.tag in (
                        'sourceReference',
                        'sourceComponent',
                ):  # already processed these
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        subelement, sourceReference, 'ls12')
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        subelement, sourceReference, 'ks02')
                    BibleOrgSysGlobals.checkXMLNoTail(subelement,
                                                      sourceReference, 'sqw1')

                elif subelement.tag == 'BibleReferenceLink':
                    BibleOrgSysGlobals.checkXMLNoText(subelement,
                                                      sourceReference, 'haw9')
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        subelement, sourceReference, 'hs19')
                    BibleOrgSysGlobals.checkXMLNoTail(subelement,
                                                      sourceReference, 'jsd9')

                    targetReference = subelement.find('targetReference').text
                    targetComponent = subelement.find('targetComponent').text
                    assert targetComponent in (
                        'Section',
                        'Verses',
                        'Verse',
                    )
                    linkType = subelement.find('linkType').text
                    assert linkType in (
                        'TSK',
                        'QuotedOTReference',
                        'AlludedOTReference',
                        'PossibleOTReference',
                    )

                    actualRawLinksList.append((
                        targetReference,
                        targetComponent,
                        linkType,
                    ))
                    actualLinkCount += 1

            rawRefLinkList.append((
                sourceReference,
                sourceComponent,
                actualRawLinksList,
            ))

        vPrint(
            'Normal', debuggingThisModule,
            f"  {len(rawRefLinkList):,} raw links loaded (with {actualLinkCount:,} actual raw link entries)"
        )

        myRefLinkList = []
        actualLinkCount = 0
        BOS = BibleOrganisationalSystem('GENERIC-KJV-66-ENG')

        for j, (sourceReference, sourceComponent,
                actualRawLinksList) in enumerate(rawRefLinkList):
            # Just do some testing first
            if sourceComponent == 'Verse':
                x = SimpleVerseKey(sourceReference)
            else:
                flag = False
                try:
                    x = SimpleVerseKey(sourceReference, ignoreParseErrors=True)
                    flag = True
                except TypeError:
                    pass  # This should happen coz it should fail the SVK
                if flag:
                    logging.error("{} {!r} failed!".format(
                        sourceComponent, sourceReference))
                    raise TypeError
            # Now do the actual parsing
            parsedSourceReference = FlexibleVersesKey(sourceReference)
            if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                vPrint('Quiet', debuggingThisModule, j, sourceComponent,
                       sourceReference, parsedSourceReference)
                #assert parsedSourceReference.getShortText().replace(' ','_') == sourceReference
            actualLinksList = []
            for k, (targetReference, targetComponent,
                    linkType) in enumerate(actualRawLinksList):
                # Just do some testing first
                if targetComponent == 'Verse':
                    x = SimpleVerseKey(targetReference)
                else:
                    flag = False
                    try:
                        x = SimpleVerseKey(targetReference,
                                           ignoreParseErrors=True)
                        flag = True
                    except TypeError:
                        pass  # This should happen coz it should fail the SVK
                    if flag:
                        logging.error("{} {!r} failed!".format(
                            targetComponent, targetReference))
                        raise TypeError
                # Now do the actual parsing
                try:
                    parsedTargetReference = FlexibleVersesKey(targetReference)
                except TypeError:
                    logging.error(
                        "  Temporarily ignored {!r} (TypeError from FlexibleVersesKey)"
                        .format(targetReference))
                    parsedTargetReference = None
                if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                    vPrint('Quiet', debuggingThisModule, ' ', targetComponent,
                           targetReference, parsedTargetReference)
                    #assert parsedTargetReference.getShortText().replace(' ','_',1) == targetReference

                actualLinksList.append((
                    targetReference,
                    targetComponent,
                    parsedTargetReference,
                    linkType,
                ))
                actualLinkCount += 1

            myRefLinkList.append((
                sourceReference,
                sourceComponent,
                parsedSourceReference,
                actualLinksList,
            ))

        vPrint(
            'Normal', debuggingThisModule,
            "  {:,} links processed (with {:,} actual link entries)".format(
                len(rawRefLinkList), actualLinkCount))
        #vPrint( 'Quiet', debuggingThisModule, myRefLinkList ); halt
        self.__DataList = myRefLinkList

        # Now put it into my dictionaries for easy access
        # This part should be customized or added to for however you need to process the data

        # Create a link dictionary (by verse key)
        myRefLinkDict = {}
        for sourceReference, sourceComponent, parsedSourceReference, actualLinksList in myRefLinkList:
            #vPrint( 'Quiet', debuggingThisModule, sourceReference, sourceComponent, parsedSourceReference )
            #vPrint( 'Quiet', debuggingThisModule, sourceReference, sourceComponent, parsedSourceReference, actualLinksList )
            for verseRef in parsedSourceReference.getIncludedVerses():
                #vPrint( 'Quiet', debuggingThisModule, verseRef )
                assert isinstance(verseRef, SimpleVerseKey)
                if verseRef not in myRefLinkDict: myRefLinkDict[verseRef] = []
                myRefLinkDict[verseRef].append((
                    sourceReference,
                    sourceComponent,
                    parsedSourceReference,
                    actualLinksList,
                ))
            #vPrint( 'Quiet', debuggingThisModule, myRefLinkDict ); halt
        originalLinks = len(myRefLinkDict)
        vPrint(
            'Quiet', debuggingThisModule,
            "  {:,} verse links added to dictionary (includes filling out spans)"
            .format(originalLinks))
        #vPrint( 'Quiet', debuggingThisModule, myRefLinkDict ); halt

        # Create a reversed link dictionary (by verse key)
        for sourceReference, sourceComponent, parsedSourceReference, actualLinksList in myRefLinkList:
            #vPrint( 'Quiet', debuggingThisModule, sourceReference, sourceComponent, parsedSourceReference )
            #vPrint( 'Quiet', debuggingThisModule, sourceReference, sourceComponent, parsedSourceReference, actualLinksList )
            for targetReference, targetComponent, parsedTargetReference, linkType in actualLinksList:
                if parsedTargetReference is not None:
                    for verseRef in parsedTargetReference.getIncludedVerses():
                        #vPrint( 'Quiet', debuggingThisModule, verseRef )
                        assert isinstance(verseRef, SimpleVerseKey)
                        if linkType == 'TSK': reverseLinkType = 'TSKQuoted'
                        elif linkType == 'QuotedOTReference':
                            reverseLinkType = 'OTReferenceQuoted'
                        elif linkType == 'AlludedOTReference':
                            reverseLinkType = 'OTReferenceAlluded'
                        elif linkType == 'PossibleOTReference':
                            reverseLinkType = 'OTReferencePossible'
                        else:
                            halt  # Have a new linkType!
                        if verseRef not in myRefLinkDict:
                            myRefLinkDict[verseRef] = []
                        myRefLinkDict[verseRef].append(
                            (targetReference, targetComponent,
                             parsedTargetReference, [
                                 (sourceReference, sourceComponent,
                                  parsedSourceReference, reverseLinkType)
                             ]))
            #vPrint( 'Quiet', debuggingThisModule, myRefLinkDict ); halt
        totalLinks = len(myRefLinkDict)
        reverseLinks = totalLinks - originalLinks
        vPrint(
            'Quiet', debuggingThisModule,
            "  {:,} reverse links added to dictionary to give {:,} total".
            format(reverseLinks, totalLinks))
        #vPrint( 'Quiet', debuggingThisModule, myRefLinkDict ); halt

        self.__DataDict = myRefLinkDict

        # Let's find the most number of references for a verse
        mostReferences = totalReferences = 0
        for verseRef, entryList in self.__DataDict.items():
            numRefs = len(entryList)
            if numRefs > mostReferences:
                mostReferences, mostVerseRef = numRefs, verseRef
            totalReferences += numRefs
        vPrint(
            'Quiet', debuggingThisModule,
            "  {:,} maximum links for any one reference ({})".format(
                mostReferences, mostVerseRef.getShortText()))
        vPrint('Quiet', debuggingThisModule,
               "  {:,} total links for all references".format(totalReferences))

        return self.__DataList, self.__DataDict

    # end of BibleReferencesLinksConverter.importDataToPython

    def pickle(self, filepath=None):
        """
        Writes the information tables to a .pickle file that can be easily loaded into a Python3 program.
        """
        import pickle

        assert self._XMLTree
        self.importDataToPython()
        assert self.__DataList
        assert self.__DataDict

        if not filepath:
            folder = BibleOrgSysGlobals.DEFAULT_WRITEABLE_DERIVED_DATAFILES_FOLDERPATH
            if not os.path.exists(folder): os.mkdir(folder)
            filepath = os.path.join(folder,
                                    self._filenameBase + '_Tables.pickle')
        vPrint('Normal', debuggingThisModule,
               _("Exporting to {}…").format(filepath))
        with open(filepath, 'wb') as myFile:
            pickle.dump(self.__DataList, myFile)
            pickle.dump(self.__DataDict, myFile)

    # end of BibleReferencesLinksConverter.pickle

    def exportDataWithIndex(self, filepath=None):
        """
        Writes the information tables to a .pickle index file and .json file that can be easily loaded into a Java program.

        See http://en.wikipedia.org/wiki/JSON.
        """
        import pickle

        assert self._XMLTree
        self.importDataToPython()
        assert self.__DataList
        assert self.__DataDict

        if not filepath:
            folder = BibleOrgSysGlobals.DEFAULT_WRITEABLE_DERIVED_DATAFILES_FOLDERPATH
            if not os.path.exists(folder): os.mkdir(folder)
            indexFilepath = os.path.join(
                folder, self._filenameBase + '_Tables.index.pickle')
            dataFilepath = os.path.join(
                folder, self._filenameBase + '_Tables.data.pickle')
        vPrint('Normal', debuggingThisModule,
               _("Exporting to {}…").format(dataFilepath))
        index = {}
        filePosition = 0
        with open(dataFilepath, 'wb') as myFile:
            for vKey, refList in self.__DataDict.items():
                #vPrint( 'Quiet', debuggingThisModule, "vKey", vKey, vKey.getVerseKeyText() )
                #vPrint( 'Quiet', debuggingThisModule, " ", refList )
                length = myFile.write(pickle.dumps(refList))
                #vPrint( 'Quiet', debuggingThisModule, " ", filePosition, length )
                assert vKey not in index
                index[vKey] = (filePosition, length)
                filePosition += length
        with open(indexFilepath, 'wb') as myFile:
            pickle.dump(index, myFile)

    # end of BibleReferencesLinksConverter.exportDataWithIndex

    def exportDataToPython(self, filepath=None):
        """
        Writes the information tables to a .py file that can be cut and pasted into a Python program.
        """
        def exportPythonDictOrList(theFile, theDictOrList, dictName,
                                   keyComment, fieldsComment):
            """Exports theDictOrList to theFile."""
            assert theDictOrList
            raise Exception("Not written yet")
            for dictKey in theDict.keys():  # Have to iterate this :(
                fieldsCount = len(theDict[dictKey])
                break  # We only check the first (random) entry we get
            theFile.write(
                "{} = {{\n  # Key is {}\n  # Fields ({}) are: {}\n".format(
                    dictName, keyComment, fieldsCount, fieldsComment))
            for dictKey in sorted(theDict.keys()):
                theFile.write('  {}: {},\n'.format(repr(dictKey),
                                                   repr(theDict[dictKey])))
            theFile.write("}}\n# end of {} ({} entries)\n\n".format(
                dictName, len(theDict)))

        # end of exportPythonDictOrList

        assert self._XMLTree
        self.importDataToPython()
        assert self.__DataList
        assert self.__DataDict

        vPrint('Quiet', debuggingThisModule,
               "Export to Python not written yet!")
        halt

        if not filepath:
            folder = BibleOrgSysGlobals.DEFAULT_WRITEABLE_DERIVED_DATAFILES_FOLDERPATH
            if not os.path.exists(folder): os.mkdir(folder)
            filepath = os.path.join(folder, self._filenameBase + '_Tables.py')
        vPrint('Normal', debuggingThisModule,
               _("Exporting to {}…").format(filepath))
        with open(filepath, 'wt', encoding='utf-8') as myFile:
            myFile.write("# {}\n#\n".format(filepath))
            myFile.write(
                "# This UTF-8 file was automatically generated by BibleReferencesLinks.py V{} on {}\n#\n"
                .format(PROGRAM_VERSION, datetime.now()))
            if self.titleString:
                myFile.write("# {} data\n".format(self.titleString))
            if self.PROGRAM_VERSION:
                myFile.write("#  Version: {}\n".format(self.PROGRAM_VERSION))
            if self.dateString:
                myFile.write("#  Date: {}\n#\n".format(self.dateString))
            myFile.write(
                "#   {} {} loaded from the original XML file.\n#\n\n".format(
                    len(self._XMLTree), self._treeTag))
            mostEntries = "0=referenceNumber (integer 1..255), 1=sourceComponent/BBB (3-uppercase characters)"
            dictInfo = {
                "referenceNumberDict":
                ("referenceNumber (integer 1..255)", "specified"),
                "sourceComponentDict": ("sourceComponent", "specified"),
                "sequenceList":
                ("sourceComponent/BBB (3-uppercase characters)", ""),
                "initialAllAbbreviationsDict":
                ("allAbbreviations", mostEntries)
            }
            for dictName, dictData in self.__DataList.items():
                exportPythonDictOrList(myFile, dictData, dictName,
                                       dictInfo[dictName][0],
                                       dictInfo[dictName][1])
            myFile.write("# end of {}".format(os.path.basename(filepath)))

    # end of BibleReferencesLinksConverter.exportDataToPython

    def exportDataToJSON(self, filepath=None):
        """
        Writes the information tables to a .json file that can be easily loaded into a Java program.

        See http://en.wikipedia.org/wiki/JSON.
        """
        import json

        assert self._XMLTree
        self.importDataToPython()
        assert self.__DataList
        assert self.__DataDict

        if not filepath:
            folder = BibleOrgSysGlobals.DEFAULT_WRITEABLE_DERIVED_DATAFILES_FOLDERPATH
            if not os.path.exists(folder): os.mkdir(folder)
            filepath = os.path.join(folder,
                                    self._filenameBase + '_Tables.json')
        vPrint('Normal', debuggingThisModule,
               _("Exporting to {}…").format(filepath))
        with open(filepath, 'wt', encoding='utf-8') as myFile:
            for something in self.__DataList:  # temp for debugging … xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
                vPrint('Quiet', debuggingThisModule, "Dumping something",
                       something)
                json.dump(something, myFile, indent=2)
            json.dump(self.__DataList, myFile, indent=2)

            for someKey, someItem in self.__DataDict.items(
            ):  # temp for debugging … xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
                vPrint('Quiet', debuggingThisModule, "Dumping someKey",
                       someKey)
                json.dump(someKey, myFile, indent=2)
                vPrint('Quiet', debuggingThisModule, "Dumping someItem",
                       someItem)
                json.dump(someItem, myFile, indent=2)
            json.dump(self.__DataDict, myFile, indent=2)

    # end of BibleReferencesLinksConverter.exportDataToJSON

    def exportDataToC(self, filepath=None):
        """
        Writes the information tables to a .h and .c files that can be included in c and c++ programs.

        NOTE: The (optional) filepath should not have the file extension specified -- this is added automatically.
        """
        def exportPythonDict(hFile, cFile, theDict, dictName, sortedBy,
                             structure):
            """ Exports theDict to the .h and .c files. """
            def convertEntry(entry):
                """ Convert special characters in an entry… """
                result = ""
                if isinstance(entry, str):
                    result = entry
                elif isinstance(entry, tuple):
                    for field in entry:
                        if result: result += ", "  # Separate the fields
                        if field is None: result += '""'
                        elif isinstance(field, str):
                            result += '"' + str(field).replace('"',
                                                               '\\"') + '"'
                        elif isinstance(field, int):
                            result += str(field)
                        elif isinstance(field, list):
                            raise Exception("Not written yet (list1)")
                        else:
                            logging.error(
                                _("Cannot convert unknown field type {!r} in tuple entry {!r}"
                                  ).format(field, entry))
                elif isinstance(entry, dict):
                    for key in sorted(entry.keys()):
                        field = entry[key]
                        if result: result += ", "  # Separate the fields
                        if field is None: result += '""'
                        elif isinstance(field, str):
                            result += '"' + str(field).replace('"',
                                                               '\\"') + '"'
                        elif isinstance(field, int):
                            result += str(field)
                        elif isinstance(field, list):
                            raise Exception("Not written yet (list2)")
                        else:
                            logging.error(
                                _("Cannot convert unknown field type {!r} in dict entry {!r}"
                                  ).format(field, entry))
                else:
                    logging.error(
                        _("Can't handle this type of entry yet: {}").format(
                            repr(entry)))
                return result

            # end of convertEntry

            for dictKey in theDict.keys():  # Have to iterate this :(
                fieldsCount = len(
                    theDict[dictKey]
                ) + 1  # Add one since we include the key in the count
                break  # We only check the first (random) entry we get

            #hFile.write( "typedef struct {}EntryStruct { {} } {}Entry;\n\n".format( dictName, structure, dictName ) )
            hFile.write("typedef struct {}EntryStruct {{\n".format(dictName))
            for declaration in structure.split(';'):
                adjDeclaration = declaration.strip()
                if adjDeclaration:
                    hFile.write("    {};\n".format(adjDeclaration))
            hFile.write("}} {}Entry;\n\n".format(dictName))

            cFile.write(
                "const static {}Entry\n {}[{}] = {{\n  // Fields ({}) are {}\n  // Sorted by {}\n"
                .format(dictName, dictName, len(theDict), fieldsCount,
                        structure, sortedBy))
            for dictKey in sorted(theDict.keys()):
                if isinstance(dictKey, str):
                    cFile.write("  {{\"{}\", {}}},\n".format(
                        dictKey, convertEntry(theDict[dictKey])))
                elif isinstance(dictKey, int):
                    cFile.write("  {{{}, {}}},\n".format(
                        dictKey, convertEntry(theDict[dictKey])))
                else:
                    logging.error(
                        _("Can't handle this type of key data yet: {}").format(
                            dictKey))
            cFile.write("]}}; // {} ({} entries)\n\n".format(
                dictName, len(theDict)))

        # end of exportPythonDict

        assert self._XMLTree
        self.importDataToPython()
        assert self.__DataList

        vPrint('Quiet', debuggingThisModule, "Export to C not written yet!")
        halt

        if not filepath:
            folder = BibleOrgSysGlobals.DEFAULT_WRITEABLE_DERIVED_DATAFILES_FOLDERPATH
            if not os.path.exists(folder): os.mkdir(folder)
            filepath = os.path.join(folder, self._filenameBase + '_Tables')
        hFilepath = filepath + '.h'
        cFilepath = filepath + '.c'
        vPrint('Normal', debuggingThisModule,
               _("Exporting to {}…").format(
                   cFilepath))  # Don't bother telling them about the .h file
        ifdefName = self._filenameBase.upper() + "_Tables_h"

        with open( hFilepath, 'wt', encoding='utf-8' ) as myHFile, \
             open( cFilepath, 'wt', encoding='utf-8' ) as myCFile:
            myHFile.write("// {}\n//\n".format(hFilepath))
            myCFile.write("// {}\n//\n".format(cFilepath))
            lines = "// This UTF-8 file was automatically generated by BibleReferencesLinks.py V{} on {}\n//\n".format(
                PROGRAM_VERSION, datetime.now())
            myHFile.write(lines)
            myCFile.write(lines)
            if self.titleString:
                lines = "// {} data\n".format(self.titleString)
                myHFile.write(lines)
                myCFile.write(lines)
            if self.PROGRAM_VERSION:
                lines = "//  Version: {}\n".format(self.PROGRAM_VERSION)
                myHFile.write(lines)
                myCFile.write(lines)
            if self.dateString:
                lines = "//  Date: {}\n//\n".format(self.dateString)
                myHFile.write(lines)
                myCFile.write(lines)
            myCFile.write(
                "//   {} {} loaded from the original XML file.\n//\n\n".format(
                    len(self._XMLTree), self._treeTag))
            myHFile.write("\n#ifndef {}\n#define {}\n\n".format(
                ifdefName, ifdefName))
            myCFile.write('#include "{}"\n\n'.format(
                os.path.basename(hFilepath)))

            CHAR = "const unsigned char"
            BYTE = "const int"
            dictInfo = {
                "referenceNumberDict":
                ("referenceNumber (integer 1..255)",
                 "{} referenceNumber; {}* ByzantineAbbreviation; {}* CCELNumberString; {}* NETBibleAbbreviation; {}* OSISAbbreviation; {} USFMAbbreviation[3+1]; {} USFMNumberString[2+1]; {}* SBLAbbreviation; {}* SwordAbbreviation; {}* sourceReference; {}* numExpectedChapters; {}* possibleAlternativeBooks; {} sourceComponent[3+1];"
                 .format(BYTE, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR,
                         CHAR, CHAR, CHAR, CHAR)),
                "sourceComponentDict":
                ("sourceComponent",
                 "{} sourceComponent[3+1]; {}* ByzantineAbbreviation; {}* CCELNumberString; {} referenceNumber; {}* NETBibleAbbreviation; {}* OSISAbbreviation; {} USFMAbbreviation[3+1]; {} USFMNumberString[2+1]; {}* SBLAbbreviation; {}* SwordAbbreviation; {}* sourceReference; {}* numExpectedChapters; {}* possibleAlternativeBooks;"
                 .format(CHAR, CHAR, CHAR, BYTE, CHAR, CHAR, CHAR, CHAR, CHAR,
                         CHAR, CHAR, CHAR, CHAR)),
                "sequenceList": ("sequenceList", ),
                "CCELDict":
                ("CCELNumberString",
                 "{}* CCELNumberString; {} referenceNumber; {} sourceComponent[3+1];"
                 .format(CHAR, BYTE, CHAR)),
                "initialAllAbbreviationsDict":
                ("abbreviation",
                 "{}* abbreviation; {} sourceComponent[3+1];".format(
                     CHAR, CHAR))
            }

            for dictName, dictData in self.__DataList.items():
                exportPythonDict(myHFile, myCFile, dictData, dictName,
                                 dictInfo[dictName][0], dictInfo[dictName][1])

            myHFile.write("#endif // {}\n\n".format(ifdefName))
            myHFile.write("// end of {}".format(os.path.basename(hFilepath)))
            myCFile.write("// end of {}".format(os.path.basename(cFilepath)))

Exemplo n.º 5

0

Exibir arquivo

Arquivo: BibleOrganizationalSystemsConverter.py Projeto: alerque/BibleOrgSys

class BibleOrganizationalSystemsConverter:
    """
    Class for handling and converting BibleOrganizationalSystems.
    """

    def __init__( self ):
        """
        Constructor: expects the filepath of the source XML file.
        Loads (and crudely validates the XML file) into an element tree.
        """
        self._filenameBase = "BibleOrganizationalSystems"

        # These fields are used for parsing the XML
        self._treeTag = "BibleOrganizationalSystems"
        self._headerTag = "header"
        self._mainElementTag = "BibleOrganizationalSystem"

        # These fields are used for automatically checking/validating the XML
        self._compulsoryAttributes = ( "type", )
        self._optionalAttributes = ()
        self._uniqueAttributes = ()
        self._compulsoryElements = ( "referenceAbbreviation", "languageCode", )
        self._optionalElements = ( "name", "completionDate", "publicationDate", "copyright", "versificationSystem", "punctuationSystem", "bookOrderSystem", "booksNamesSystem",
                                    "translator", "publisher", "derivedFrom", "usesText", "includesBooks", "url", "comment", )
        self._uniqueElements = ()
        self._allowedMultiple = ( "name", "translator", "derivedFrom", "usesText", "url", "comment", )

        # These are fields that we will fill later
        self.title, self.version, self.date = None, None, None
        self.header, self._XMLtree = None, None
        self.__dataDicts = None

        # Get the data tables that we need for proper checking
        self._ISOLanguages = ISO_639_3_Languages().loadData()
        self._BibleBookOrderSystems = BibleBookOrderSystems().loadData()
        self._BiblePunctuationSystems = BiblePunctuationSystems().loadData()
        self._BibleVersificationSystems = BibleVersificationSystems().loadData()
        self._BibleBooksNamesSystems = BibleBooksNamesSystems().loadData()
    # end of BibleOrganizationalSystemsConverter.__init__


    def __str__( self ):
        """
        This method returns the string representation of a Bible book code.

        @return: the name of a Bible object formatted as a string
        @rtype: string
        """
        result = ""
        if self.title: result += ('\n' if result else '') + self.title
        if self.version: result += ('\n' if result else '') + "  Version: {}".format( self.version )
        if self.date: result += ('\n' if result else '') + "  Date: {}".format( self.date )
        result += ('\n' if result else '') + "  Number of entries = {}".format( len(self._XMLtree) )
        return result
    # end of BibleOrganizationalSystemsConverter.__str__


    def __len__( self ):
        """ Returns the number of items loaded. """
        return len( self._XMLtree )
    # end of BibleOrganizationalSystemsConverter.__len__


    def loadAndValidate( self, XMLFilepath=None ):
        """
        Loads (and crudely validates the XML file) into an element tree.
            Allows the filepath of the source XML file to be specified, otherwise uses the default.
        """
        if self._XMLtree is None: # We mustn't have already have loaded the data
            if XMLFilepath is None:
                XMLFilepath = os.path.join( os.path.dirname(__file__), "DataFiles", self._filenameBase + ".xml" ) # Relative to module, not cwd

            self._load( XMLFilepath )
            if BibleOrgSysGlobals.strictCheckingFlag:
                self._validate()
        return self
    # end of BibleOrganizationalSystemsConverter.loadAndValidate


    def _load( self, XMLFilepath ):
        """
        Load the source XML file and remove the header from the tree.
        Also, extracts some useful elements from the header element.
        """
        assert( XMLFilepath )
        self.__XMLFilepath = XMLFilepath
        assert( self._XMLtree is None or len(self._XMLtree)==0 ) # Make sure we're not doing this twice

        if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading BibleOrganisationalSystems XML file from {!r}...").format( self.__XMLFilepath ) )
        self._XMLtree = ElementTree().parse( self.__XMLFilepath )
        assert( self._XMLtree ) # Fail here if we didn't load anything at all

        if self._XMLtree.tag  == self._treeTag:
            header = self._XMLtree[0]
            if header.tag == self._headerTag:
                self.header = header
                self._XMLtree.remove( header )
                if len(header)>1:
                    logging.info( _("Unexpected elements in header") )
                elif len(header)==0:
                    logging.info( _("Missing work element in header") )
                else:
                    work = header[0]
                    if work.tag == "work":
                        self.version = work.find("version").text
                        self.date = work.find("date").text
                        self.title = work.find("title").text
                    else:
                        logging.warning( _("Missing work element in header") )
            else:
                logging.warning( _("Missing header element (looking for {!r} tag)").format( self._headerTag ) )
        else:
            logging.error( _("Expected to load {!r} but got {!r}").format( self._treeTag, self._XMLtree.tag ) )
    # end of BibleOrganizationalSystemsConverter._load


    def _validate( self ):
        """
        Check/validate the loaded data.
        """
        assert( self._XMLtree )

        uniqueDict = {}
        for elementName in self._uniqueElements: uniqueDict["Element_"+elementName] = []
        for attributeName in self._uniqueAttributes: uniqueDict["Attribute_"+attributeName] = []

        expectedID = 1
        for j,element in enumerate(self._XMLtree):
            if element.tag == self._mainElementTag:
                # Check compulsory attributes on this main element
                for attributeName in self._compulsoryAttributes:
                    attributeValue = element.get( attributeName )
                    if attributeValue is None:
                        logging.error( _("Compulsory {!r} attribute is missing from {} element in record {}").format( attributeName, element.tag, j ) )
                    if not attributeValue:
                        logging.warning( _("Compulsory {!r} attribute is blank on {} element in record {}").format( attributeName, element.tag, j ) )

                # Check optional attributes on this main element
                for attributeName in self._optionalAttributes:
                    attributeValue = element.get( attributeName )
                    if attributeValue is not None:
                        if not attributeValue:
                            logging.warning( _("Optional {!r} attribute is blank on {} element in record {}").format( attributeName, element.tag, j ) )

                # Check for unexpected additional attributes on this main element
                for attributeName in element.keys():
                    attributeValue = element.get( attributeName )
                    if attributeName not in self._compulsoryAttributes and attributeName not in self._optionalAttributes:
                        logging.warning( _("Additional {!r} attribute ({!r}) found on {} element in record {}").format( attributeName, attributeValue, element.tag, j ) )

                # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes)
                for attributeName in self._uniqueAttributes:
                    attributeValue = element.get( attributeName )
                    if attributeValue is not None:
                        if attributeValue in uniqueDict["Attribute_"+attributeName]:
                            logging.error( _("Found {!r} data repeated in {!r} field on {} element in record {}").format( attributeValue, attributeName, element.tag, j ) )
                        uniqueDict["Attribute_"+attributeName].append( attributeValue )

                ID = element.find("referenceAbbreviation").text

                # Check compulsory elements
                for elementName in self._compulsoryElements:
                    if element.find( elementName ) is None:
                        logging.error( _("Compulsory {!r} element is missing in record with ID {!r} (record {})").format( elementName, ID, j ) )
                    elif not element.find( elementName ).text:
                        logging.warning( _("Compulsory {!r} element is blank in record with ID {!r} (record {})").format( elementName, ID, j ) )

                # Check optional elements
                for elementName in self._optionalElements:
                    if element.find( elementName ) is not None:
                        if not element.find( elementName ).text:
                            logging.warning( _("Optional {!r} element is blank in record with ID {!r} (record {})").format( elementName, ID, j ) )

                # Check for unexpected additional elements
                for subelement in element:
                    if subelement.tag not in self._compulsoryElements and subelement.tag not in self._optionalElements:
                        logging.warning( _("Additional {!r} element ({!r}) found in record with ID {!r} (record {})").format( subelement.tag, subelement.text, ID, j ) )

                # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements)
                for elementName in self._uniqueElements:
                    if element.find( elementName ) is not None:
                        text = element.find( elementName ).text
                        if text in uniqueDict["Element_"+elementName]:
                            logging.error( _("Found {!r} data repeated in {!r} element in record with ID {!r} (record {})").format( text, elementName, ID, j ) )
                        uniqueDict["Element_"+elementName].append( text )

                # Special checks of particular fields
                if element.find("includesBooks") is not None:
                    bookList = element.find("includesBooks").text.split()
                    for BBB in bookList:
                        if not BibleOrgSysGlobals.BibleBooksCodes.isValidReferenceAbbreviation( BBB ):
                            logging.critical( _("Unrecognized {!r} Bible book code found in 'includesBooks' in record with ID {!r} (record {})").format( BBB, ID, j) )
                        if bookList.count( BBB ) > 1:
                            logging.error( _("Multiple {!r} Bible book codes found in 'includesBooks' in record with ID {!r} (record {})").format( BBB, ID, j) )

            else:
                logging.warning( _("Unexpected element: {} in record {}").format( element.tag, j ) )
    # end of BibleOrganizationalSystemsConverter._validate


    def importDataToPython( self ):
        """
        Loads (and pivots) the data (not including the header) into suitable Python containers to use in a Python program.
        (Of course, you can just use the elementTree in self._XMLtree if you prefer.)
        """
        assert( self._XMLtree )
        if self.__dataDicts: # We've already done an import/restructuring -- no need to repeat it
            return self.__dataDicts

        # We'll create a number of dictionaries with different elements as the key
        dataDict, indexDict, combinedIndexDict = {}, {}, {}
        for element in self._XMLtree:
            bits = {}
            # Get the required information out of the tree for this element
            # Start with the compulsory elements and type attribute
            referenceAbbreviation = element.find('referenceAbbreviation').text
            bits['referenceAbbreviation'] = referenceAbbreviation
            myType = element.get( 'type' )
            bits['type'] = myType
            if myType not in allowedTypes: logging.error( _("Unrecognized {!r} type for {!r} (expected one of {})").format(myType,referenceAbbreviation,allowedTypes) )
            languageCode = element.find('languageCode').text
            if self._ISOLanguages and not self._ISOLanguages.isValidLanguageCode( languageCode ): # Check that we have a valid language code
                if languageCode != '???':
                    logging.error( "Unrecognized {!r} ISO-639-3 language code in {!r} organisational system".format( languageCode, referenceAbbreviation ) )
            bits['languageCode'] = languageCode

            # Now work on the optional elements
            for name in ( 'name', 'publicationDate', 'versificationSystem', 'punctuationSystem', 'bookOrderSystem', 'booksNamesSystem', 'derivedFrom', 'usesText', 'includesBooks' ):
                for nameData in element.findall(name):
                    if name in self._allowedMultiple: # Put multiple entries into a list
                        if name not in bits: bits[name] = [nameData.text]
                        else: bits[name].append( nameData.text )
                    else: # Not allowed multiples
                        if name in bits: logging.error( _("Unexpected multiple {} elements found in {} {}").format(name, referenceAbbreviation, myType) )
                        if name=='includesBooks': # special handling
                            bits['includesBooks'] = nameData.text.split()
                            for BBB in bits['includesBooks']:
                                if not BibleOrgSysGlobals.BibleBooksCodes.isValidReferenceAbbreviation( BBB ):
                                    logging.error( _("Unrecognized {!r} Bible book code found in 'includesBooks' in {} {}").format( BBB, referenceAbbreviation, myType) )
                        else: bits[name] = nameData.text # normal handling

            extension = '_' + myType
            extendedRA = referenceAbbreviation if referenceAbbreviation.endswith(extension) else (referenceAbbreviation + extension)
            dataDict[extendedRA] = bits
            if referenceAbbreviation in indexDict: indexDict[referenceAbbreviation].append( extendedRA )
            else: indexDict[referenceAbbreviation] = [extendedRA]
            if referenceAbbreviation in combinedIndexDict: combinedIndexDict[referenceAbbreviation].append( extendedRA )
            else: combinedIndexDict[referenceAbbreviation] = [extendedRA]
            if extendedRA != referenceAbbreviation:
                #assert( extendedRA not in combinedIndexDict )
                if extendedRA in combinedIndexDict: logging.error( _("Found {} in combinedIndexDict").format( extendedRA ) )
                combinedIndexDict[extendedRA] = [extendedRA]
        assert( len(indexDict) <= len(dataDict) )
        assert( len(combinedIndexDict) >= len(indexDict) )

        if BibleOrgSysGlobals.strictCheckingFlag: # We'll do quite a bit more cross-checking now
            for extendedReferenceAbbreviation,data in dataDict.items():
                #print( extendedReferenceAbbreviation, data )
                systemType = data['type']
                if systemType=='edition':
                    if 'derivedFrom' in data: logging.error( _("{} shouldn't use 'derivedFrom' {!r}").format( extendedReferenceAbbreviation, data['derivedFrom'] ) )
                    if 'usesText' not in data: logging.error( _("{} doesn't specify 'usesText'").format( extendedReferenceAbbreviation ) )
                    else: # have a 'usesText' list
                        for textAbbrev in data['usesText']:
                            if textAbbrev not in indexDict: logging.error( _("{} specifies unknown {!r} text in 'usesText' field").format(extendedReferenceAbbreviation,textAbbrev) )
                            elif len(indexDict[textAbbrev]) > 1: # it could be ambiguous
                                found = 0
                                for thisType in ('revision','translation','original'): # but not 'edition'
                                    usesTextExtended = textAbbrev + '_' + thisType
                                    if usesTextExtended in dataDict:
                                        foundOne = usesTextExtended
                                        found += 1
                                assert( found > 0 )
                                if found==1: # ah, it's not actually ambiguous
                                    if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Adjusted text used for {} from the ambiguous {!r} to the extended name {!r}").format( extendedReferenceAbbreviation, textAbbrev, foundOne ) )
                                    data['usesText'].remove( textAbbrev)
                                    data['usesText'].append( foundOne )
                                else: logging.warning( _("{} specifies ambiguous {!r} (could be {}) texts in 'usesText' field").format(extendedReferenceAbbreviation,textAbbrev,indexDict[textAbbrev]) )
                elif systemType=='revision':
                    if 'derivedFrom' not in data: logging.error( _("{} doesn't specify 'derivedFrom'").format( extendedReferenceAbbreviation ) )
                    else:
                        for df in data['derivedFrom']:
                            if df not in indexDict: logging.error( _("{} specifies unknown {!r} text in 'derivedFrom' field").format(extendedReferenceAbbreviation,df) )
                            elif len(indexDict[df]) > 1: logging.warning( _("{} specifies ambiguous {!r} (could be {}) texts in 'derivedFrom' field").format(extendedReferenceAbbreviation,df,indexDict[df]) )
                elif systemType=='translation':
                    if 'derivedFrom' not in data: logging.warning( _("{} doesn't specify 'derivedFrom'").format( extendedReferenceAbbreviation ) )
                    else:
                        for df in data['derivedFrom']:
                            if df not in indexDict: logging.error( _("{} specifies unknown {!r} text in 'derivedFrom' field").format(extendedReferenceAbbreviation,df) )
                            elif len(indexDict[df]) > 1: logging.warning( _("{} specifies ambiguous {!r} (could be {}) texts in 'derivedFrom' field").format(extendedReferenceAbbreviation,df,indexDict[df]) )
                elif systemType=='original':
                    if 'derivedFrom' in data: logging.error( _("{} shouldn't use 'derivedFrom' {!r}").format( extendedReferenceAbbreviation, data['derivedFrom'] ) )
                if 'versificationSystem' in data and data['versificationSystem'] not in ('None', 'Unknown'):
                    if not self._BibleVersificationSystems.isValidVersificationSystemName( data['versificationSystem'] ):
                        extra = "\n  Available systems are {}".format( self._BibleVersificationSystems.getAvailableVersificationSystemNames()) if BibleOrgSysGlobals.verbosityLevel > 2 else ''
                        logging.error( _("Unknown {!r} versification system name in {}{}").format(data['versificationSystem'],extendedReferenceAbbreviation,extra) )
                if 'punctuationSystem' in data and data['punctuationSystem'] not in ('None', 'Unknown'):
                    if not self._BiblePunctuationSystems.isValidPunctuationSystemName( data['punctuationSystem'] ):
                        extra = "\n  Available systems are {}".format( self._BiblePunctuationSystems.getAvailablePunctuationSystemNames()) if BibleOrgSysGlobals.verbosityLevel > 2 else ''
                        logging.error( _("Unknown {!r} punctuation system name in {}{}").format(data['punctuationSystem'],extendedReferenceAbbreviation,extra) )

        self.__dataDicts = dataDict, indexDict, combinedIndexDict
        return self.__dataDicts
    # end of importDataToPython


    def pickle( self, filepath=None ):
        """
        Writes the information tables to a .pickle file that can be easily loaded into a Python3 program.
        """
        import pickle

        assert( self._XMLtree )
        self.importDataToPython()
        assert( self.__dataDicts )

        if not filepath:
            folder = os.path.join( os.path.split(self.__XMLFilepath)[0], "DerivedFiles/" )
            if not os.path.exists( folder ): os.mkdir( folder )
            filepath = os.path.join( folder, self._filenameBase + "_Tables.pickle" )
        if BibleOrgSysGlobals.verbosityLevel > 1: print( _("Exporting to {}...").format( filepath ) )
        with open( filepath, 'wb' ) as myFile:
            pickle.dump( self.__dataDicts, myFile )
    # end of pickle


    def exportDataToPython( self, filepath=None ):
        """
        Writes the information tables to a .py file that can be cut and pasted into a Python program.
        """
        def exportPythonDict( theFile, theDict, dictName, keyComment, fieldsComment ):
            """Exports theDict to theFile."""
            theFile.write( "{} = {{\n  # Key is {}\n  # Fields are: {}\n".format( dictName, keyComment, fieldsComment ) )
            for dictKey in sorted(theDict.keys()):
                theFile.write( '  {}: {},\n'.format( repr(dictKey), theDict[dictKey] ) )
            theFile.write( "}}\n# end of {}\n\n".format( dictName ) )
        # end of exportPythonDict


        assert( self._XMLtree )
        self.importDataToPython()
        assert( self.__dataDicts )

        if not filepath: filepath = os.path.join( os.path.split(self.__XMLFilepath)[0], "DerivedFiles", self._filenameBase + "_Tables.py" )
        if BibleOrgSysGlobals.verbosityLevel > 1: print( _("Exporting to {}...").format( filepath ) )

        dataDict, indexDict, combinedIndexDict = self.importDataToPython()
        with open( filepath, 'wt' ) as myFile:
            myFile.write( "# {}\n#\n".format( filepath ) )
            myFile.write( "# This UTF-8 file was automatically generated by BibleOrganizationalSystemsConverter.py V{} on {}\n#\n".format( ProgVersion, datetime.now() ) )
            if self.title: myFile.write( "# {}\n".format( self.title ) )
            if self.version: myFile.write( "#  Version: {}\n".format( self.version ) )
            if self.date: myFile.write( "#  Date: {}\n#\n".format( self.date ) )
            myFile.write( "#   {} {} entries loaded from the original XML file.\n".format( len(self._XMLtree), self._treeTag ) )
            #myFile.write( "#   {} {} loaded from the original XML files.\n#\n\n".format( len(self.systems), self._treeTag ) )
            exportPythonDict( myFile, dataDict, "dataDict", "extendedReferenceAbbreviation", "referenceAbbreviation, SBLAbbreviation, OSISAbbreviation, ParatextAbbreviation, ParatextNumberString, nameEnglish (comment only)" )
            exportPythonDict( myFile, indexDict, "indexDict", "referenceAbbreviation", "id, SBLAbbreviation, OSISAbbreviation, ParatextAbbreviation, ParatextNumberString, nameEnglish (comment only)" )
            exportPythonDict( myFile, combinedIndexDict, "combinedIndexDict", "referenceAbbreviation", "id, SBLAbbreviation, OSISAbbreviation, ParatextAbbreviation, ParatextNumberString, nameEnglish (comment only)" )
    # end of exportDataToPython


    def exportDataToJSON( self, filepath=None ):
        """
        Writes the information tables to a .json file that can be easily loaded into a Java program.

        See http://en.wikipedia.org/wiki/JSON.
        """
        import json

        assert( self._XMLtree )
        self.importDataToPython()
        assert( self.__dataDicts )

        if not filepath: filepath = os.path.join( os.path.split(self.__XMLFilepath)[0], "DerivedFiles", self._filenameBase + "_Tables.json" )
        if BibleOrgSysGlobals.verbosityLevel > 1: print( _("Exporting to {}...").format( filepath ) )
        with open( filepath, 'wt' ) as myFile:
            #myFile.write( "# {}\n#\n".format( filepath ) ) # Not sure yet if these comment fields are allowed in JSON
            #myFile.write( "# This UTF-8 file was automatically generated by BibleBooksCodes.py V{} on {}\n#\n".format( ProgVersion, datetime.now() ) )
            #if self.titleString: myFile.write( "# {} data\n".format( self.titleString ) )
            #if self.ProgVersion: myFile.write( "#  Version: {}\n".format( self.ProgVersion ) )
            #if self.dateString: myFile.write( "#  Date: {}\n#\n".format( self.dateString ) )
            #myFile.write( "#   {} {} loaded from the original XML file.\n#\n\n".format( len(self._XMLtree), self._treeTag ) )
            json.dump( self.__dataDicts, myFile, indent=2 )
            #myFile.write( "\n\n# end of {}".format( os.path.basename(filepath) ) )
    # end of exportDataToJSON


    def exportDataToC( self, filepath=None ):
        """
        Writes the information tables to a .h file that can be included in c and c++ programs.
        """
        raise Exception( "C export not written yet" )
        def exportPythonDict( theFile, theDict, dictName, structName, fieldsComment ):
            """Exports theDict to theFile."""
            def convertEntry( entry ):
                """Convert special characters in an entry..."""
                result = ""
                for field in entry:
                    if result: result += ", " # Separate the fields
                    if field is None: result += '""'
                    elif isinstance( field, str): result += '"' + str(field).replace('"','\\"') + '"'
                    elif isinstance( field, int): result += str(field)
                    else: logging.error( _("Cannot convert unknown field type {!r} in entry {!r}").format( field, entry ) )
                return result

            theFile.write( "static struct {} {}[] = {\n  // Fields are {}\n".format( structName, dictName, fieldsComment ) )
            for entry in sorted(theDict.keys()):
                if isinstance( entry, str ):
                    theFile.write( "  {\"{}\", {}},\n".format( entry, convertEntry(theDict[entry]) ) )
                elif isinstance( entry, int ):
                    theFile.write( "  {{}, {}},\n".format( entry, convertEntry(theDict[entry]) ) )
                else:
                    logging.error( _("Can't handle this type of data yet: {}").format( entry ) )
            theFile.write( "}; // {}\n\n".format( dictName) )
        # end of exportPythonDict


        assert( self._XMLtree )
        self.importDataToPython()
        assert( self.__dataDicts )

        if not filepath: filepath = os.path.join( os.path.split(self.__XMLFilepath)[0], "DerivedFiles", self._filenameBase + "_Tables.h" )
        if BibleOrgSysGlobals.verbosityLevel > 1: print( _("Exporting to {}...").format( filepath ) )

        IDDict, RADict, SBLDict, OADict, PADict, PNDict = self.importDataToPython()
        ifdefName = self._filenameBase.upper() + "_Tables_h"
        with open( filepath, 'wt' ) as myFile:
            myFile.write( "// {}\n//\n".format( filepath ) )
            myFile.write( "// This UTF-8 file was automatically generated by BibleOrganizationalSystemsConverter.py V{} on {}\n//\n".format( ProgVersion, datetime.now() ) )
            if self.title: myFile.write( "// {}\n".format( self.title ) )
            if self.version: myFile.write( "//  Version: {}\n".format( self.version ) )
            if self.date: myFile.write( "//  Date: {}\n//\n".format( self.date ) )
            myFile.write( "//   {} {} loaded from the original XML file.\n//\n\n".format( len(self._XMLtree), self._treeTag ) )
            myFile.write( "#ifndef {}\n#define {}\n\n".format( ifdefName, ifdefName ) )
            exportPythonDict( myFile, IDDict, "IDDict", "{int id; char* refAbbrev; char* SBLAbbrev; char* OSISAbbrev; char* PTAbbrev; char* PTNum; char* EngName;}", "id (sorted), referenceAbbreviation, SBLAbbreviation, OSISAbbreviation, ParatextAbbreviation, ParatextNumberString, nameEnglish (comment only)" )
            exportPythonDict( myFile, RADict, "RADict", "{char* refAbbrev; int id; char* SBLAbbrev; char* OSISAbbrev; char* PTAbbrev; char* PTNum; char* EngName;}", "referenceAbbreviation (sorted), SBLAbbreviation, OSISAbbreviation, ParatextAbbreviation, ParatextNumberString, id, nameEnglish (comment only)" )
            exportPythonDict( myFile, SBLDict, "SBLDict", "{char* SBLAbbrev; int id; char* refAbbrev; char* OSISAbbrev; char* PTAbbrev; char* PTNum; char* EngName;}", "SBLAbbreviation (sorted), ReferenceAbbreviation, OSISAbbreviation, ParatextAbbreviation, ParatextNumberString, id, nameEnglish (comment only)" )
            exportPythonDict( myFile, OADict, "OADict", "{char* OSISAbbrev; int id; char* refAbbrev; char* SBLAbbrev; char* PTAbbrev; char* PTNum; char* EngName;}", "OSISAbbreviation (sorted), ReferenceAbbreviation, SBLAbbreviation, ParatextAbbreviation, ParatextNumberString, id, nameEnglish (comment only)" )
            exportPythonDict( myFile, PADict, "PADict", "{char* PTAbbrev; int id; char* refAbbrev; char* SBLAbbrev; char* OSISAbbrev; char* PTNum; char* EngName;}", "ParatextAbbreviation (sorted), referenceAbbreviation, SBLAbbreviation, OSISAbbreviation, ParatextNumberString, id, nameEnglish (comment only)" )
            exportPythonDict( myFile, PNDict, "PNDict", "{char* PTNum; int id; char* PTAbbrev; char* refAbbrev; char* SBLAbbrev; char* OSISAbbrev; char* EngName;}", "ParatextNumberString (sorted), ParatextAbbreviation, referenceAbbreviation, SBLAbbreviation, OSISAbbreviation, id, nameEnglish (comment only)" )
            myFile.write( "#endif // {}\n".format( ifdefName ) )

Exemplo n.º 6

0

Exibir arquivo

Arquivo: ZefaniaXMLBible.py Projeto: dimleyk/BibleOrgSys

class ZefaniaXMLBible(Bible):
    """
    Class for reading, validating, and converting ZefaniaXMLBible XML.
    """
    XMLNameSpace = "{http://www.w3.org/2001/XMLSchema-instance}"
    treeTag = 'XMLBIBLE'
    infoTag = 'INFORMATION'
    bookTag = 'BIBLEBOOK'
    chapterTag = 'CHAPTER'
    captionTag = 'CAPTION'
    verseTag = 'VERS'
    noteTag = 'NOTE'
    styleTag = 'STYLE'
    breakTag = 'BR'

    def __init__(self, sourceFolder, givenName, encoding='utf-8'):
        """
        Constructor: just sets up the Zefania Bible object.
        """
        # Setup and initialise the base class first
        Bible.__init__(self)
        self.objectNameString = "Zefania XML Bible object"
        self.objectTypeString = "Zefania"

        # Now we can set our object variables
        self.sourceFolder, self.givenName, self.encoding = sourceFolder, givenName, encoding
        self.sourceFilepath = os.path.join(self.sourceFolder, self.givenName)

        self.tree = self.header = None  # Will hold the XML data

        # Get the data tables that we need for proper checking
        #self.ISOLanguages = ISO_639_3_Languages().loadData()
        self.genericBOS = BibleOrganizationalSystem("GENERIC-KJV-66-ENG")

        # Do a preliminary check on the readability of our file
        if not os.access(self.sourceFilepath, os.R_OK):
            print("ZefaniaXMLBible: File '{}' is unreadable".format(
                self.sourceFilepath))

        self.name = self.givenName
        #if self.name is None:
        #pass

    # end of ZefaniaXMLBible.__init__

    def load(self):
        """
        Load a single source XML file and load book elements.
        """
        if Globals.verbosityLevel > 2:
            print(_("Loading {}...").format(self.sourceFilepath))
        self.tree = ElementTree().parse(self.sourceFilepath)
        if Globals.debugFlag:
            assert (len(self.tree)
                    )  # Fail here if we didn't load anything at all

        # Find the main (bible) container
        if self.tree.tag == ZefaniaXMLBible.treeTag:
            location = "Zefania XML file"
            Globals.checkXMLNoText(self.tree, location, '4f6h')
            Globals.checkXMLNoTail(self.tree, location, '1wk8')

            schema = None
            name = status = BibleType = revision = version = lgid = None
            for attrib, value in self.tree.items():
                if attrib == ZefaniaXMLBible.XMLNameSpace + 'noNamespaceSchemaLocation':
                    schema = value
                elif attrib == "biblename":
                    name = value
                elif attrib == "lgid":
                    lgid = value  # In italian.xml this is set to "german"
                elif attrib == "status":
                    status = value
                elif attrib == "type":
                    BibleType = value
                elif attrib == "revision":
                    revision = value
                elif attrib == "version":
                    version = value
                else:
                    logging.warning(
                        "Unprocessed '{}' attribute ({}) in main element".
                        format(attrib, value))
            if name: self.name = name
            if status: self.status = status
            if revision: self.revision = revision
            if version: self.version = version

            if self.tree[0].tag == 'INFORMATION':
                self.header = self.tree[0]
                self.tree.remove(self.header)
                self.__validateAndExtractHeader()
            else:  # Handle information records at the END of the file
                ix = len(self.tree) - 1
                if self.tree[ix].tag == 'INFORMATION':
                    self.header = self.tree[ix]
                    self.tree.remove(self.header)
                    self.__validateAndExtractHeader()

            # Find the submain (book) containers
            for element in self.tree:
                if element.tag == ZefaniaXMLBible.bookTag:
                    sublocation = "book in " + location
                    Globals.checkXMLNoText(element, sublocation, 'g3g5')
                    Globals.checkXMLNoTail(element, sublocation, 'd3f6')
                    self.__validateAndExtractBook(element)
                else:
                    logging.error("Expected to find '{}' but got '{}'".format(
                        ZefaniaXMLBible.bookTag, element.tag))
        else:
            logging.error("Expected to load '{}' but got '{}'".format(
                ZefaniaXMLBible.treeTag, self.tree.tag))
        self.doPostLoadProcessing()

    # end of ZefaniaXMLBible.load

    def __validateAndExtractHeader(self):
        """
        Extracts information out of the header record, such as:
            <INFORMATION>
            <title>King James Version</title>
            <creator></creator>
            <subject>The Holy Bible</subject>
            <description>In 1604, King James I of England authorized that a new translation of the Bible into English be started. It was finished in 1611, just 85 years after the first translation of the New Testament into English appeared (Tyndale, 1526). The Authorized Version, or King James Version, quickly became the standard for English-speaking Protestants. Its flowing language and prose rhythm has had a profound influence on the literature of the past 300 years.</description>
            <publisher>FREE BIBLE SOFTWARE GROUP</publisher>
            <contributors />
            <date>2009-01-23</date>
            <type>Bible</type>
            <format>Zefania XML Bible Markup Language</format>
            <identifier>kjv</identifier>
            <source>http://www.unboundbible.com/zips/index.cfm?lang=English</source>
            <language>ENG</language>
            <coverage>provide the Bible to the nations of the world</coverage>
            <rights>We believe that this Bible is found in the Public Domain.</rights>
        </INFORMATION>
        """
        if Globals.debugFlag: assert (self.header)
        location = 'Header'
        Globals.checkXMLNoAttributes(self.header, location, 'j4j6')
        Globals.checkXMLNoText(self.header, location, 'sk4l')
        Globals.checkXMLNoTail(self.header, location, 'a2d4')

        # TODO: We probably need to rationalise some of the self.xxx stores
        for element in self.header:
            #print( "header", element.tag )
            if element.tag == 'title':
                sublocation = "title in {}".format(location)
                Globals.checkXMLNoTail(element, sublocation, 'al1d')
                Globals.checkXMLNoAttributes(element, sublocation, 'j3jd')
                Globals.checkXMLNoSubelements(element, sublocation, '5g78')
                if Globals.debugFlag: assert (element.text)
                self.title = element.text
            elif element.tag == 'creator':
                sublocation = "creator in {}".format(location)
                Globals.checkXMLNoTail(element, sublocation, 'al1d')
                Globals.checkXMLNoAttributes(element, sublocation, 'j3jd')
                Globals.checkXMLNoSubelements(element, sublocation, '5g78')
                if element.text: self.creator = element.text
            elif element.tag == 'subject':
                sublocation = "subject in {}".format(location)
                Globals.checkXMLNoTail(element, sublocation, 'al1d')
                Globals.checkXMLNoAttributes(element, sublocation, 'j3jd')
                Globals.checkXMLNoSubelements(element, sublocation, '5g78')
                if element.text: self.subject = element.text
            elif element.tag == 'description':
                sublocation = "description in {}".format(location)
                Globals.checkXMLNoTail(element, sublocation, 'al1d')
                Globals.checkXMLNoAttributes(element, sublocation, 'j3jd')
                Globals.checkXMLNoSubelements(element, sublocation, '5g78')
                if Globals.debugFlag: assert (element.text)
                self.description = element.text
            elif element.tag == 'publisher':
                sublocation = "publisher in {}".format(location)
                Globals.checkXMLNoTail(element, sublocation, 'al1d')
                Globals.checkXMLNoAttributes(element, sublocation, 'j3jd')
                Globals.checkXMLNoSubelements(element, sublocation, '5g78')
                if element.text: self.publisher = element.text
            elif element.tag == 'contributors':
                sublocation = "contributors in {}".format(location)
                Globals.checkXMLNoTail(element, sublocation, 'al1d')
                Globals.checkXMLNoAttributes(element, sublocation, 'j3jd')
                Globals.checkXMLNoSubelements(element, sublocation, '5g78')
                if element.text: self.contributors = element.text
            elif element.tag == 'date':
                sublocation = "date in {}".format(location)
                Globals.checkXMLNoTail(element, sublocation, 'al1d')
                Globals.checkXMLNoAttributes(element, sublocation, 'j3jd')
                Globals.checkXMLNoSubelements(element, sublocation, '5g78')
                if Globals.debugFlag: assert (element.text)
                self.date = element.text
            elif element.tag == 'type':
                sublocation = "type in {}".format(location)
                Globals.checkXMLNoTail(element, sublocation, 'al1d')
                Globals.checkXMLNoAttributes(element, sublocation, 'j3jd')
                Globals.checkXMLNoSubelements(element, sublocation, '5g78')
                if element.text: self.documentType = element.text
            elif element.tag == 'format':
                sublocation = "format in {}".format(location)
                Globals.checkXMLNoTail(element, sublocation, 'al1d')
                Globals.checkXMLNoAttributes(element, sublocation, 'j3jd')
                Globals.checkXMLNoSubelements(element, sublocation, '5g78')
                if Globals.debugFlag: assert (element.text)
                if Globals.debugFlag:
                    assert (
                        element.text == 'Zefania XML Bible Markup Language')
            elif element.tag == 'identifier':
                sublocation = "identifier in {}".format(location)
                Globals.checkXMLNoTail(element, sublocation, 'al1d')
                Globals.checkXMLNoAttributes(element, sublocation, 'j3jd')
                Globals.checkXMLNoSubelements(element, sublocation, '5g78')
                if Globals.debugFlag: assert (element.text)
                self.identifier = element.text
            elif element.tag == 'source':
                sublocation = "source in {}".format(location)
                Globals.checkXMLNoTail(element, sublocation, 'al1d')
                Globals.checkXMLNoAttributes(element, sublocation, 'j3jd')
                Globals.checkXMLNoSubelements(element, sublocation, '5g78')
                if Globals.debugFlag: assert (element.text)
                self.source = element.text
            elif element.tag == 'language':
                sublocation = "language in {}".format(location)
                Globals.checkXMLNoTail(element, sublocation, 'al1d')
                Globals.checkXMLNoAttributes(element, sublocation, 'j3jd')
                Globals.checkXMLNoSubelements(element, sublocation, '5g78')
                if Globals.debugFlag: assert (element.text)
                self.language = element.text
            elif element.tag == 'coverage':
                sublocation = "coverage in {}".format(location)
                Globals.checkXMLNoTail(element, sublocation, 'al1d')
                Globals.checkXMLNoAttributes(element, sublocation, 'j3jd')
                Globals.checkXMLNoSubelements(element, sublocation, '5g78')
                if element.text: self.coverage = element.text
            elif element.tag == 'rights':
                sublocation = "rights in {}".format(location)
                Globals.checkXMLNoTail(element, sublocation, 'al1d')
                Globals.checkXMLNoAttributes(element, sublocation, 'j3jd')
                Globals.checkXMLNoSubelements(element, sublocation, '5g78')
                if element.text: self.rights = element.text
            else:
                logging.error("Found unexpected '{}' tag in {}".format(
                    element.tag, location))

    # end of ZefaniaXMLBible.__validateAndExtractHeader

    def __validateAndExtractBook(self, book):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """

        if Globals.verbosityLevel > 3: print(_("Validating XML book..."))

        # Process the div attributes first
        BBB = bookName = bookShortName = bookNumber = None
        for attrib, value in book.items():
            if attrib == "bnumber":
                bookNumber = value
            elif attrib == "bname":
                bookName = value
            elif attrib == "bsname":
                bookShortName = value
            else:
                logging.warning(
                    "Unprocessed '{}' attribute ({}) in book element".format(
                        attrib, value))
        if bookNumber:
            try:
                BBB = Globals.BibleBooksCodes.getBBBFromReferenceNumber(
                    bookNumber)
            except KeyError:
                logging.warning( "Unable to deduce which book is number={}, name={}, shortName={} -- ignoring it" \
                                                                        .format( bookNumber, bookName, bookShortName ) )
        elif bookName:
            BBB = self.genericBOS.getBBB(bookName)

        if BBB:
            if Globals.verbosityLevel > 2:
                print(_("Validating {} {}...").format(BBB, bookName))
            thisBook = BibleBook(self.name, BBB)
            thisBook.objectNameString = "Zefania XML Bible Book object"
            thisBook.objectTypeString = "Zefania"
            #thisBook.sourceFilepath = self.sourceFilepath
            for element in book:
                if element.tag == ZefaniaXMLBible.chapterTag:
                    sublocation = "chapter in {}".format(BBB)
                    Globals.checkXMLNoText(element, sublocation, 'j3jd')
                    Globals.checkXMLNoTail(element, sublocation, 'al1d')
                    self.__validateAndExtractChapter(BBB, thisBook, element)
                else:
                    logging.error("Expected to find '{}' but got '{}'".format(
                        ZefaniaXMLBible.chapterTag, element.tag))
            if Globals.verbosityLevel > 2:
                print("  Saving {} into results...".format(BBB))
            self.saveBook(thisBook)

    # end of ZefaniaXMLBible.__validateAndExtractBook

    def __validateAndExtractChapter(self, BBB, thisBook, chapter):
        """
        Check/validate and extract chapter data from the given XML book record
            finding and saving chapter numbers and
            finding and saving verse elements.
        """

        if Globals.verbosityLevel > 3: print(_("Validating XML chapter..."))

        # Process the chapter attributes first
        chapterNumber = numVerses = None
        for attrib, value in chapter.items():
            if attrib == "cnumber":
                chapterNumber = value
            else:
                logging.warning(
                    "Unprocessed '{}' attribute ({}) in chapter element".
                    format(attrib, value))
        if chapterNumber:
            #print( BBB, 'c', chapterNumber )
            thisBook.appendLine('c', chapterNumber)
        else:
            logging.error(
                "Missing 'n' attribute in chapter element for BBB".format(BBB))

        for element in chapter:
            if element.tag == ZefaniaXMLBible.verseTag:
                location = "verse in {} {}".format(BBB, chapterNumber)
                self.__validateAndExtractVerse(BBB, chapterNumber, thisBook,
                                               element)
            elif element.tag == ZefaniaXMLBible.captionTag:  # Used in Psalms
                location = "caption in {} {}".format(BBB, chapterNumber)
                Globals.checkXMLNoTail(element, location, 'k5k8')
                Globals.checkXMLNoSubelements(element, location, 'd3f5')
                # Handle caption attributes
                vRef = None
                for attrib, value in element.items():
                    if attrib == "vref":
                        vRef = value
                        if Globals.debugFlag: assert (vRef == '1')
                    else:
                        logging.warning(
                            "Unprocessed '{}' attribute ({}) in caption element"
                            .format(attrib, value))
                if Globals.debugFlag: assert (vRef)
                vText = element.text
                if not vText:
                    logging.warning("{} {}:{} has no text".format(
                        BBB, chapterNumber, vRef))
                if vText:  # This is the main text of the caption
                    #print( "{} {}:{} '{}'".format( BBB, chapterNumber, verseNumber, vText ) )
                    thisBook.appendLine('v', '0' + ' ' +
                                        vText)  # We save it as verse zero
            else:
                logging.error("Expected to find '{}' but got '{}'".format(
                    ZefaniaXMLBible.verseTag, element.tag))

    # end of ZefaniaXMLBible.__validateAndExtractChapter

    def __validateAndExtractVerse(self, BBB, chapterNumber, thisBook, verse):
        """
        Check/validate and extract chapter data from the given XML book record
            finding and saving chapter numbers and
            finding and saving verse elements.
        """

        if Globals.verbosityLevel > 3: print(_("Validating XML verse..."))

        location = "verse in {} {}".format(BBB, chapterNumber)
        Globals.checkXMLNoTail(verse, location, 'l5ks')

        # Handle verse attributes
        verseNumber = toVerseNumber = None
        for attrib, value in verse.items():
            if attrib == "vnumber":
                verseNumber = value
            else:
                logging.warning(
                    "Unprocessed '{}' attribute ({}) in verse element".format(
                        attrib, value))
        if Globals.debugFlag: assert (verseNumber)
        location = "{}:{}".format(
            location, verseNumber)  # Get a better location description
        #thisBook.appendLine( 'v', verseNumber )
        vText = verse.text
        if vText: vText = vText.strip()
        #if not vText: # This happens if a verse starts immediately with a style or note
        #logging.warning( "{} {}:{} has no text".format( BBB, chapterNumber, verseNumber ) )

        # Handle verse subelements (notes and styled portions)
        for subelement in verse:
            if subelement.tag == ZefaniaXMLBible.noteTag:
                sublocation = "note in " + location
                noteType = None
                for attrib, value in subelement.items():
                    if attrib == "type":
                        noteType = value
                    else:
                        logging.warning(
                            "Unprocessed '{}' attribute ({}) in style subelement"
                            .format(attrib, value))
                if noteType not in (
                        'n-studynote',
                        'x-studynote',
                ):
                    logging.warning("Unexpected {} note type in {}".format(
                        noteType, BBB))
                if Globals.debugFlag: assert (noteType)
                nText, nTail = subelement.text, subelement.tail
                #print( "note", BBB, chapterNumber, verseNumber, noteType, repr(nText), repr(nTail) )
                #thisBook.appendLine( 'ST', css ) # XXXXXXXXXXXXXXXXXXXXXXXXXX Losing data here (for now)
                #thisBook.appendLine( 'ST=', nText )
                if nTail:
                    if '\n' in nTail:
                        print(
                            "ZefaniaXMLBible.__validateAndExtractVerse: nTail {} {}:{} '{}'"
                            .format(BBB, chapterNumber, verseNumber, nTail))
                        nTail = nTail.replace('\n', ' ')
                    thisBook.appendLine('v~', nTail)
                for subsubelement in subelement:
                    if subsubelement.tag == ZefaniaXMLBible.styleTag:
                        subsublocation = "style in " + sublocation
                        Globals.checkXMLNoSubelements(subsubelement,
                                                      subsublocation, 'fyt4')
                        css = idStyle = None
                        for attrib, value in subsubelement.items():
                            if attrib == "css":
                                css = value
                            elif attrib == "id":
                                idStyle = value
                            else:
                                logging.warning(
                                    "Unprocessed '{}' attribute ({}) in style subsubelement"
                                    .format(attrib, value))
                        if Globals.debugFlag: assert (css or idStyle)
                        SFM = None
                        if css == "font-style:italic": SFM = '\\it'
                        elif css == "font-style:italic;font-weight:bold":
                            SFM = '\\bdit'
                        elif css == "color:#FF0000":
                            SFM = '\\em'
                        elif css == "font-size: x-small; color:#8B8378":
                            SFM = '\\add'
                        elif css is None and idStyle == 'cl:divineName':
                            SFM = '\\nd'
                        else:
                            print("css is", css, "idStyle is", idStyle)
                            halt
                        sText, sTail = subsubelement.text.strip(
                        ), subsubelement.tail
                        if Globals.debugFlag: assert (sText)
                        if SFM: vText += SFM + ' ' + sText + SFM + '*'
                        else:
                            vText += '\\sc ' + '[' + css + ']' + sText + '\\sc* '  # Use sc for unknown styles
                        if sTail: vText += sTail.strip()
                    else:
                        logging.error(
                            "Expected to find {} but got '{}' in {}".format(
                                ZefaniaXMLBible.styleTag, subsubelement.tag,
                                sublocation))

            elif subelement.tag == ZefaniaXMLBible.styleTag:
                sublocation = "style in " + location
                Globals.checkXMLNoSubelements(subelement, sublocation, 'f5gh')
                css = idStyle = None
                for attrib, value in subelement.items():
                    if attrib == "css":
                        css = value
                    elif attrib == "id":
                        idStyle = value
                    else:
                        logging.warning(
                            "Unprocessed '{}' attribute ({}) in style subelement"
                            .format(attrib, value))
                if Globals.debugFlag: assert (css or idStyle)
                SFM = None
                if css == "font-style:italic": SFM = '\\it'
                elif css == "font-style:italic;font-weight:bold":
                    SFM = '\\bdit'
                elif css == "color:#FF0000":
                    SFM = '\\em'
                elif css == "font-size: x-small; color:#8B8378":
                    SFM = '\\add'
                elif css is None and idStyle == 'cl:divineName':
                    SFM = '\\nd'
                else:
                    print("css is", css, "idStyle is", idStyle)
                    halt
                sText, sTail = subelement.text.strip(), subelement.tail
                if Globals.debugFlag: assert (sText)
                if SFM: vText += SFM + ' ' + sText + SFM + '*'
                else:
                    vText += '\\sc ' + '[' + css + ']' + sText + '\\sc* '  # Use sc for unknown styles
                if sTail: vText += sTail.strip()

            elif subelement.tag == ZefaniaXMLBible.breakTag:
                sublocation = "line break in " + location
                Globals.checkXMLNoText(subelement, sublocation, 'c1d4')
                Globals.checkXMLNoSubelements(subelement, sublocation, 'g4g8')
                art = None
                for attrib, value in subelement.items():
                    if attrib == "art":
                        art = value
                    else:
                        logging.warning(
                            "Unprocessed '{}' attribute ({}) in style subelement"
                            .format(attrib, value))
                if Globals.debugFlag: assert (art == 'x-nl')
                #print( BBB, chapterNumber, verseNumber )
                #assert( vText )
                if vText:
                    thisBook.appendLine('v', verseNumber + ' ' + vText)
                    vText = ''
                thisBook.appendLine(
                    'm',
                    subelement.tail.strip() if subelement.tail else '')
                #bTail = subelement.tail
                #if bTail: vText = bTail.strip()
            else:
                logging.error(
                    "Expected to find NOTE or STYLE but got '{}' in {}".format(
                        subelement.tag, location))

        if vText:  # This is the main text of the verse (follows the verse milestone)
            if '\n' in vText:
                print(
                    "ZefaniaXMLBible.__validateAndExtractVerse: vText {} {}:{} '{}'"
                    .format(BBB, chapterNumber, verseNumber, vText))
                vText = vText.replace('\n', ' ')
            thisBook.appendLine('v', verseNumber + ' ' + vText)

Exemplo n.º 7

0

Exibir arquivo

class USFMMarkersConverter:
    """
    Class for reading, validating, and converting USFMMarkers.
    This is only intended as a transitory class (used at start-up).
    The USFMMarkers class has functions more generally useful.
    """
    def __init__(
            self):  # We can't give this parameters because of the singleton
        """
        Constructor: expects the filepath of the source XML file.
        Loads (and crudely validates the XML file) into an element tree.
        """
        self._filenameBase = "USFMMarkers"

        # These fields are used for parsing the XML
        self._treeTag = "USFMMarkers"
        self._headerTag = "header"
        self._mainElementTag = "USFMMarker"

        # These fields are used for automatically checking/validating the XML
        self._compulsoryAttributes = ()
        self._optionalAttributes = ()
        self._uniqueAttributes = self._compulsoryAttributes + self._optionalAttributes
        self._compulsoryElements = (
            "nameEnglish",
            "marker",
            "compulsory",
            "level",
            "numberable",
            "nests",
            "hasContent",
            "printed",
            "closed",
            "occursIn",
            "deprecated",
        )
        self._optionalElements = ("description", )
        #self._uniqueElements = self._compulsoryElements + self.optionalElements
        self._uniqueElements = (
            "nameEnglish",
            "marker",
        )

        # These are fields that we will fill later
        self._XMLheader, self._XMLtree = None, None
        self.__DataDicts = {}  # Used for import
        self.titleString = self.ProgVersion = self.dateString = ''

    # end of __init__

    def loadAndValidate(self, XMLFilepath=None):
        """
        Loads (and crudely validates the XML file) into an element tree.
            Allows the filepath of the source XML file to be specified, otherwise uses the default.
        """
        if self._XMLtree is None:  # We mustn't have already have loaded the data
            if XMLFilepath is None:
                XMLFilepath = os.path.join(
                    os.path.dirname(__file__), "DataFiles",
                    self._filenameBase + ".xml")  # Relative to module, not cwd
            self.__load(XMLFilepath)
            if Globals.strictCheckingFlag:
                self.__validate()
        else:  # The data must have been already loaded
            if XMLFilepath is not None and XMLFilepath != self.__XMLFilepath:
                logging.error(
                    _("Bible books codes are already loaded -- your different filepath of '{}' was ignored"
                      ).format(XMLFilepath))
        return self

    # end of loadAndValidate

    def __load(self, XMLFilepath):
        """
        Load the source XML file and remove the header from the tree.
        Also, extracts some useful elements from the header element.
        """
        assert (XMLFilepath)
        self.__XMLFilepath = XMLFilepath
        assert (self._XMLtree is None or len(self._XMLtree) == 0
                )  # Make sure we're not doing this twice

        if Globals.verbosityLevel > 2:
            print(
                _("Loading USFMMarkers XML file from '{}'...").format(
                    self.__XMLFilepath))
        self._XMLtree = ElementTree().parse(self.__XMLFilepath)
        assert (self._XMLtree)  # Fail here if we didn't load anything at all

        if self._XMLtree.tag == self._treeTag:
            header = self._XMLtree[0]
            if header.tag == self._headerTag:
                self.XMLheader = header
                self._XMLtree.remove(header)
                Globals.checkXMLNoText(header, "header")
                Globals.checkXMLNoTail(header, "header")
                Globals.checkXMLNoAttributes(header, "header")
                if len(header) > 1:
                    logging.info(_("Unexpected elements in header"))
                elif len(header) == 0:
                    logging.info(_("Missing work element in header"))
                else:
                    work = header[0]
                    Globals.checkXMLNoText(work, "work in header")
                    Globals.checkXMLNoTail(work, "work in header")
                    Globals.checkXMLNoAttributes(work, "work in header")
                    if work.tag == "work":
                        self.ProgVersion = work.find("version").text
                        self.dateString = work.find("date").text
                        self.titleString = work.find("title").text
                    else:
                        logging.warning(_("Missing work element in header"))
            else:
                logging.warning(
                    _("Missing header element (looking for '{}' tag)".format(
                        self._headerTag)))
            if header.tail is not None and header.tail.strip():
                logging.error(
                    _("Unexpected '{}' tail data after header").format(
                        element.tail))
        else:
            logging.error(
                _("Expected to load '{}' but got '{}'").format(
                    self._treeTag, self._XMLtree.tag))

    # end of __load

    def __validate(self):
        """
        Check/validate the loaded data.
        """
        assert (self._XMLtree)

        uniqueDict = {}
        for elementName in self._uniqueElements:
            uniqueDict["Element_" + elementName] = []
        for attributeName in self._uniqueAttributes:
            uniqueDict["Attribute_" + attributeName] = []

        expectedID = 1
        for j, element in enumerate(self._XMLtree):
            if element.tag == self._mainElementTag:
                Globals.checkXMLNoText(element, element.tag)
                Globals.checkXMLNoTail(element, element.tag)
                if not self._compulsoryAttributes and not self._optionalAttributes:
                    Globals.checkXMLNoAttributes(element, element.tag)
                if not self._compulsoryElements and not self._optionalElements:
                    Globals.checkXMLNoSubelements(element, element.tag)

                # Check compulsory attributes on this main element
                for attributeName in self._compulsoryAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is None:
                        logging.error(
                            _("Compulsory '{}' attribute is missing from {} element in record {}"
                              ).format(attributeName, element.tag, j))
                    if not attributeValue:
                        logging.warning(
                            _("Compulsory '{}' attribute is blank on {} element in record {}"
                              ).format(attributeName, element.tag, j))

                # Check optional attributes on this main element
                for attributeName in self._optionalAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is not None:
                        if not attributeValue:
                            logging.warning(
                                _("Optional '{}' attribute is blank on {} element in record {}"
                                  ).format(attributeName, element.tag, j))

                # Check for unexpected additional attributes on this main element
                for attributeName in element.keys():
                    attributeValue = element.get(attributeName)
                    if attributeName not in self._compulsoryAttributes and attributeName not in self._optionalAttributes:
                        logging.warning(
                            _("Additional '{}' attribute ('{}') found on {} element in record {}"
                              ).format(attributeName, attributeValue,
                                       element.tag, j))

                # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes)
                for attributeName in self._uniqueAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is not None:
                        if attributeValue in uniqueDict["Attribute_" +
                                                        attributeName]:
                            logging.error(
                                _("Found '{}' data repeated in '{}' field on {} element in record {}"
                                  ).format(attributeValue, attributeName,
                                           element.tag, j))
                        uniqueDict["Attribute_" +
                                   attributeName].append(attributeValue)

                # Get the marker to use as a record ID
                marker = element.find("marker").text

                # Check compulsory elements
                for elementName in self._compulsoryElements:
                    if element.find(elementName) is None:
                        logging.error(
                            _("Compulsory '{}' element is missing in record with marker '{}' (record {})"
                              ).format(elementName, marker, j))
                    elif not element.find(elementName).text:
                        logging.warning(
                            _("Compulsory '{}' element is blank in record with marker '{}' (record {})"
                              ).format(elementName, marker, j))

                # Check optional elements
                for elementName in self._optionalElements:
                    if element.find(elementName) is not None:
                        if not element.find(elementName).text:
                            logging.warning(
                                _("Optional '{}' element is blank in record with marker '{}' (record {})"
                                  ).format(elementName, marker, j))

                # Check for unexpected additional elements
                for subelement in element:
                    if subelement.tag not in self._compulsoryElements and subelement.tag not in self._optionalElements:
                        logging.warning(
                            _("Additional '{}' element ('{}') found in record with marker '{}' (record {})"
                              ).format(subelement.tag, subelement.text, marker,
                                       j))

                # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements)
                for elementName in self._uniqueElements:
                    if element.find(elementName) is not None:
                        text = element.find(elementName).text
                        if text in uniqueDict["Element_" + elementName]:
                            logging.error(
                                _("Found '{}' data repeated in '{}' element in record with marker '{}' (record {})"
                                  ).format(text, elementName, marker, j))
                        uniqueDict["Element_" + elementName].append(text)
            else:
                logging.warning(
                    _("Unexpected element: {} in record {}").format(
                        element.tag, j))
            if element.tail is not None and element.tail.strip():
                logging.error(
                    _("Unexpected '{}' tail data after {} element in record {}"
                      ).format(element.tail, element.tag, j))
        if self._XMLtree.tail is not None and self._XMLtree.tail.strip():
            logging.error(
                _("Unexpected '{}' tail data after {} element").format(
                    self._XMLtree.tail, self._XMLtree.tag))

    # end of __validate

    def __str__(self):
        """
        This method returns the string representation of a Bible book code.

        @return: the name of a Bible object formatted as a string
        @rtype: string
        """
        indent = 2
        result = "USFMMarkersConverter object"
        if self.titleString:
            result += ('\n' if result else
                       '') + ' ' * indent + _("Title: {}").format(
                           self.titleString)
        if self.ProgVersion:
            result += ('\n' if result else
                       '') + ' ' * indent + _("Version: {}").format(
                           self.ProgVersion)
        if self.dateString:
            result += ('\n' if result else ''
                       ) + ' ' * indent + _("Date: {}").format(self.dateString)
        if self._XMLtree is not None:
            result += ('\n' if result else
                       '') + ' ' * indent + _("Number of entries = {}").format(
                           len(self._XMLtree))
        return result

    # end of __str__

    def __len__(self):
        """ Returns the number of SFM markers loaded. """
        return len(self._XMLtree)

    # end of __len__

    def importDataToPython(self):
        """
        Loads (and pivots) the data (not including the header) into suitable Python containers to use in a Python program.
        (Of course, you can just use the elementTree in self._XMLtree if you prefer.)
        """
        assert (self._XMLtree)
        if self.__DataDicts:  # We've already done an import/restructuring -- no need to repeat it
            return self.__DataDicts

        # Load and validate entries and create the dictionaries and lists
        # Note that the combined lists include the numbered markers, e.g., s as well as s1, s2, ...
        rawMarkerDict, numberedMarkerList, combinedMarkerDict, = OrderedDict(
        ), [], {}
        conversionDict, backConversionDict = {}, {}
        newlineMarkersList, numberedNewlineMarkersList, combinedNewlineMarkersList = [], [], []
        internalMarkersList, numberedInternalMarkersList, combinedInternalMarkersList = [], [], []
        noteMarkersList, deprecatedMarkersList = [], []
        for element in self._XMLtree:
            # Get the required information out of the tree for this element
            # Start with the compulsory elements
            nameEnglish = element.find(
                "nameEnglish"
            ).text  # This name is really just a comment element
            marker = element.find("marker").text
            if marker.lower() != marker:
                logging.error(
                    _("Marker '{}' should be lower case").format(marker))
            compulsory = element.find("compulsory").text
            if compulsory not in ("Yes", "No"):
                logging.error(
                    _("Unexpected '{}' compulsory field for marker '{}'").
                    format(compulsory, marker))
            level = element.find("level").text
            compulsoryFlag = compulsory == "Yes"
            if level == "Newline":
                newlineMarkersList.append(marker)
                combinedNewlineMarkersList.append(marker)
            elif level == "Internal":
                internalMarkersList.append(marker)
            elif level == "Note":
                noteMarkersList.append(marker)
            else:
                logging.error(
                    _("Unexpected '{}' level field for marker '{}'").format(
                        level, marker))
            numberable = element.find("numberable").text
            if numberable not in ("Yes", "No"):
                logging.error(
                    _("Unexpected '{}' numberable field for marker '{}'").
                    format(numberable, marker))
            numberableFlag = numberable == "Yes"
            if numberableFlag and level == "Character":
                logging.error(
                    _("Unexpected '{}' numberable field for character marker '{}'"
                      ).format(numberable, marker))
            nests = element.find("nests").text
            if nests not in ("Yes", "No"):
                logging.error(
                    _("Unexpected '{}' nests field for marker '{}'").format(
                        nests, marker))
            nestsFlag = nests == "Yes"
            hasContent = element.find("hasContent").text
            if hasContent not in ("Always", "Never", "Sometimes"):
                logging.error(
                    _("Unexpected '{}' hasContent field for marker '{}'").
                    format(hasContent, marker))
            printed = element.find("printed").text
            if printed not in ("Yes", "No"):
                logging.error(
                    _("Unexpected '{}' printed field for marker '{}'").format(
                        printed, marker))
            printedFlag = printed == "Yes"
            closed = element.find("closed").text
            if closed not in ("No", "Always", "Optional"):
                logging.error(
                    _("Unexpected '{}' closed field for marker '{}'").format(
                        closed, marker))
            occursIn = element.find("occursIn").text
            if occursIn not in ("Header", "Introduction", "Numbering", "Text",
                                "Canonical Text", "Poetry", "Text, Poetry",
                                "Acrostic verse", "Table row", "Footnote",
                                "Cross-reference", "Front and back matter"):
                logging.error(
                    _("Unexpected '{}' occursIn field for marker '{}'").format(
                        occursIn, marker))
            deprecated = element.find("deprecated").text
            if deprecated not in ("Yes", "No"):
                logging.error(
                    _("Unexpected '{}' deprecated field for marker '{}'").
                    format(deprecated, marker))
            deprecatedFlag = deprecated == "Yes"

            # The optional elements are set to None if they don't exist
            #closed = None if element.find("closed") is None else element.find("closed").text
            #if closed is not None and closed not in ( "No", "Always", "Optional" ): logging.error( _("Unexpected '{}' closed field for marker '{}'").format( closed, marker ) )
            #if level=="Character" and closed is None: logging.error( _("Entry for character marker '{}' doesn't have a \"closed\" field").format( marker ) )
            description = None if element.find(
                "description") is None else element.find("description").text
            if description is not None: assert (description)

            # Now put it into my dictionaries and lists for easy access
            #   The marker is lowercase by definition
            if "marker" in self._uniqueElements:
                assert (marker
                        not in rawMarkerDict)  # Shouldn't be any duplicates
            rawMarkerDict[marker] = {
                "compulsoryFlag": compulsoryFlag,
                "level": level,
                "numberableFlag": numberableFlag,
                "nestsFlag": nestsFlag,
                "hasContent": hasContent,
                "occursIn": occursIn,
                "printedFlag": printedFlag,
                "closed": closed,
                "deprecatedFlag": deprecatedFlag,
                "description": description,
                "nameEnglish": nameEnglish
            }
            combinedMarkerDict[marker] = marker
            if numberableFlag:  # We have some extra work to do
                conversionDict[marker] = marker + '1'
                for suffix in (
                        '1234'):  # These are the suffix digits that we allow
                    numberedMarker = marker + suffix
                    backConversionDict[numberedMarker] = marker
                    numberedMarkerList.append(numberedMarker)
                    combinedMarkerDict[numberedMarker] = marker
                    if marker in newlineMarkersList:
                        numberedNewlineMarkersList.append(numberedMarker)
                        combinedNewlineMarkersList.append(numberedMarker)
                    else:
                        numberedInternalMarkersList.append(numberedMarker)
                        combinedInternalMarkersList.append(numberedMarker)
                    if deprecatedFlag:
                        deprecatedMarkersList.append(numberedMarker)
            else:  # it's not numberable
                numberedMarkerList.append(marker)
                if marker in newlineMarkersList:
                    numberedNewlineMarkersList.append(marker)
                else:
                    numberedInternalMarkersList.append(marker)
                if deprecatedFlag: deprecatedMarkersList.append(marker)

        #print( conversionDict ); print( backConversionDict )
        #print( "newlineMarkersList", len(newlineMarkersList), newlineMarkersList )
        #print( "numberedNewlineMarkersList", len(numberedNewlineMarkersList), numberedNewlineMarkersList )
        #print( "combinedNewlineMarkersList", len(combinedNewlineMarkersList), combinedNewlineMarkersList )
        #print( "internalMarkersList", len(internalMarkersList), internalMarkersList )
        #print( "deprecatedMarkersList", len(deprecatedMarkersList), deprecatedMarkersList )
        self.__DataDicts = {
            "rawMarkerDict": rawMarkerDict,
            "numberedMarkerList": numberedMarkerList,
            "combinedMarkerDict": combinedMarkerDict,
            "conversionDict": conversionDict,
            "backConversionDict": backConversionDict,
            "newlineMarkersList": newlineMarkersList,
            "numberedNewlineMarkersList": numberedNewlineMarkersList,
            "combinedNewlineMarkersList": combinedNewlineMarkersList,
            "internalMarkersList": internalMarkersList,
            "numberedInternalMarkersList": numberedInternalMarkersList,
            "combinedInternalMarkersList": combinedInternalMarkersList,
            "noteMarkersList": noteMarkersList,
            "deprecatedMarkersList": deprecatedMarkersList,
        }
        return self.__DataDicts  # Just delete any of the dictionaries that you don't need

    # end of importDataToPython

    def pickle(self, filepath=None):
        """
        Writes the information tables to a .pickle file that can be easily loaded into a Python3 program.
        """
        import pickle

        assert (self._XMLtree)
        self.importDataToPython()
        assert (self.__DataDicts)

        if not filepath:
            folder = os.path.join(
                os.path.split(self.__XMLFilepath)[0], "DerivedFiles/")
            if not os.path.exists(folder): os.mkdir(folder)
            filepath = os.path.join(folder,
                                    self._filenameBase + "_Tables.pickle")
        if Globals.verbosityLevel > 1:
            print(_("Exporting to {}...").format(filepath))
        with open(filepath, 'wb') as myFile:
            pickle.dump(self.__DataDicts, myFile)

    # end of pickle

    def exportDataToPython(self, filepath=None):
        """
        Writes the information tables to a .py file that can be cut and pasted into a Python program.
        """
        def exportPythonDict(theFile, theDict, dictName, keyComment,
                             fieldsComment):
            """Exports theDict to theFile."""
            assert (isinstance(theDict, dict))
            for dictKey in theDict.keys():  # Have to iterate this :(
                fieldsCount = len(theDict[dictKey]) if isinstance(
                    theDict[dictKey], (tuple, dict, list)) else 1
                break  # We only check the first (random) entry we get
            theFile.write(
                "{} = {{\n  # Key is {}\n  # Fields ({}) are: {}\n".format(
                    dictName, keyComment, fieldsCount, fieldsComment))
            for dictKey in sorted(theDict.keys()):
                theFile.write('  {}: {},\n'.format(repr(dictKey),
                                                   repr(theDict[dictKey])))
            theFile.write("}}\n# end of {} ({} entries)\n\n".format(
                dictName, len(theDict)))

        # end of exportPythonDict

        def exportPythonOrderedDict(theFile, theDict, dictName, keyComment,
                                    fieldsComment):
            """Exports theDict to theFile."""
            assert (isinstance(theDict, OrderedDict))
            for dictKey in theDict.keys():  # Have to iterate this :(
                fieldsCount = len(theDict[dictKey]) if isinstance(
                    theDict[dictKey], (tuple, dict, list)) else 1
                break  # We only check the first (random) entry we get
            theFile.write(
                '{} = OrderedDict([\n    # Key is {}\n    # Fields ({}) are: {}\n'
                .format(dictName, keyComment, fieldsCount, fieldsComment))
            for dictKey in theDict.keys():
                theFile.write('  ({}, {}),\n'.format(repr(dictKey),
                                                     repr(theDict[dictKey])))
            theFile.write("]), # end of {} ({} entries)\n\n".format(
                dictName, len(theDict)))

        # end of exportPythonDict

        def exportPythonList(theFile, theList, listName, dummy, fieldsComment):
            """Exports theList to theFile."""
            assert (isinstance(theList, list))
            fieldsCount = len(theList[0]) if isinstance(
                theList[0], (tuple, dict, list)) else 1
            theFile.write('{} = [\n    # Fields ({}) are: {}\n'.format(
                listName, fieldsCount, fieldsComment))
            for j, entry in enumerate(theList):
                theFile.write('  {}, # {}\n'.format(repr(entry), j))
            theFile.write("], # end of {} ({} entries)\n\n".format(
                listName, len(theList)))

        # end of exportPythonList

        assert (self._XMLtree)
        self.importDataToPython()
        assert (self.__DataDicts)

        if not filepath:
            filepath = os.path.join(
                os.path.split(self.__XMLFilepath)[0], "DerivedFiles",
                self._filenameBase + "_Tables.py")
        if Globals.verbosityLevel > 1:
            print(_("Exporting to {}...").format(filepath))
        with open(filepath, 'wt') as myFile:
            myFile.write("# {}\n#\n".format(filepath))
            myFile.write(
                "# This UTF-8 file was automatically generated by USFMMarkers.py V{} on {}\n#\n"
                .format(ProgVersion, datetime.now()))
            if self.titleString:
                myFile.write("# {} data\n".format(self.titleString))
            if self.ProgVersion:
                myFile.write("#  Version: {}\n".format(self.ProgVersion))
            if self.dateString:
                myFile.write("#  Date: {}\n#\n".format(self.dateString))
            myFile.write(
                "#   {} {} loaded from the original XML file.\n#\n\n".format(
                    len(self._XMLtree), self._treeTag))
            myFile.write("from collections import OrderedDict\n\n")
            dictInfo = {
                "rawMarkerDict":
                (exportPythonOrderedDict,
                 "rawMarker (in the original XML order)", "specified"),
                "numberedMarkerList":
                (exportPythonList, "marker", "rawMarker"),
                "combinedMarkerDict":
                (exportPythonDict, "marker", "rawMarker"),
                "conversionDict":
                (exportPythonDict, "rawMarker", "numberedMarker"),
                "backConversionDict": (exportPythonDict, "numberedMarker",
                                       "rawMarker"),
                "newlineMarkersList": (exportPythonList, "", "rawMarker"),
                "numberedNewlineMarkersList": (exportPythonList, "",
                                               "rawMarker"),
                "combinedNewlineMarkersList": (exportPythonList, "",
                                               "rawMarker"),
                "internalMarkersList": (exportPythonList, "", "rawMarker"),
                "numberedInternalMarkersList": (exportPythonList, "",
                                                "rawMarker"),
                "combinedInternalMarkersList": (exportPythonList, "",
                                                "rawMarker"),
                "noteMarkersList": (exportPythonList, "", "rawMarker"),
                "deprecatedMarkersList": (exportPythonList, "", "rawMarker")
            }
            for dictName in self.__DataDicts:
                exportFunction, keyComment, fieldsComment = dictInfo[dictName]
                exportFunction(myFile, self.__DataDicts[dictName], dictName,
                               keyComment, fieldsComment)
            myFile.write("# end of {}".format(os.path.basename(filepath)))

    # end of exportDataToPython

    def exportDataToJSON(self, filepath=None):
        """
        Writes the information tables to a .json file that can be easily loaded into a Java program.

        See http://en.wikipedia.org/wiki/JSON.
        """
        import json

        assert (self._XMLtree)
        self.importDataToPython()
        assert (self.__DataDicts)

        if not filepath:
            filepath = os.path.join(
                os.path.split(self.__XMLFilepath)[0], "DerivedFiles",
                self._filenameBase + "_Tables.json")
        if Globals.verbosityLevel > 1:
            print(_("Exporting to {}...").format(filepath))
        with open(filepath, 'wt') as myFile:
            json.dump(self.__DataDicts, myFile, indent=2)

    # end of exportDataToJSON

    def exportDataToC(self, filepath=None):
        """
        Writes the information tables to a .h and .c files that can be included in c and c++ programs.

        NOTE: The (optional) filepath should not have the file extension specified -- this is added automatically.
        """
        def exportPythonDict(hFile, cFile, theDict, dictName, sortedBy,
                             structure):
            """ Exports theDict to the .h and .c files. """
            def convertEntry(entry):
                """ Convert special characters in an entry... """
                result = ""
                if isinstance(entry, tuple):
                    for field in entry:
                        if result: result += ", "  # Separate the fields
                        if field is None: result += '""'
                        elif isinstance(field, str):
                            result += '"' + str(field).replace('"',
                                                               '\\"') + '"'
                        elif isinstance(field, int):
                            result += str(field)
                        else:
                            logging.error(
                                _("Cannot convert unknown field type '{}' in entry '{}'"
                                  ).format(field, entry))
                elif isinstance(entry, dict):
                    for key in sorted(entry.keys()):
                        field = entry[key]
                        if result: result += ", "  # Separate the fields
                        if field is None: result += '""'
                        elif isinstance(field, str):
                            result += '"' + str(field).replace('"',
                                                               '\\"') + '"'
                        elif isinstance(field, int):
                            result += str(field)
                        else:
                            logging.error(
                                _("Cannot convert unknown field type '{}' in entry '{}'"
                                  ).format(field, entry))
                else:
                    logging.error(
                        _("Can't handle this type of entry yet: {}").format(
                            repr(entry)))
                return result

            # end of convertEntry

            for dictKey in theDict.keys():  # Have to iterate this :(
                fieldsCount = len(
                    theDict[dictKey]
                ) + 1  # Add one since we include the key in the count
                break  # We only check the first (random) entry we get

            #hFile.write( "typedef struct {}EntryStruct { {} } {}Entry;\n\n".format( dictName, structure, dictName ) )
            hFile.write("typedef struct {}EntryStruct {{\n".format(dictName))
            for declaration in structure.split(';'):
                adjDeclaration = declaration.strip()
                if adjDeclaration:
                    hFile.write("    {};\n".format(adjDeclaration))
            hFile.write("}} {}Entry;\n\n".format(dictName))

            cFile.write(
                "const static {}Entry\n {}[{}] = {{\n  // Fields ({}) are {}\n  // Sorted by {}\n"
                .format(dictName, dictName, len(theDict), fieldsCount,
                        structure, sortedBy))
            for dictKey in sorted(theDict.keys()):
                if isinstance(dictKey, str):
                    cFile.write("  {{\"{}\", {}}},\n".format(
                        dictKey, convertEntry(theDict[dictKey])))
                elif isinstance(dictKey, int):
                    cFile.write("  {{{}, {}}},\n".format(
                        dictKey, convertEntry(theDict[dictKey])))
                else:
                    logging.error(
                        _("Can't handle this type of key data yet: {}").format(
                            dictKey))
            cFile.write("]}}; // {} ({} entries)\n\n".format(
                dictName, len(theDict)))

        # end of exportPythonDict

        assert (self._XMLtree)
        self.importDataToPython()
        assert (self.__DataDicts)

        raise Exception("C export not written yet, sorry.")
        if not filepath:
            filepath = os.path.join(
                os.path.split(self.__XMLFilepath)[0], "DerivedFiles",
                self._filenameBase + "_Tables")
        hFilepath = filepath + '.h'
        cFilepath = filepath + '.c'
        if Globals.verbosityLevel > 1:
            print(_("Exporting to {}...").format(
                cFilepath))  # Don't bother telling them about the .h file
        ifdefName = self._filenameBase.upper() + "_Tables_h"

        with open(hFilepath, 'wt') as myHFile, open(cFilepath,
                                                    'wt') as myCFile:
            myHFile.write("// {}\n//\n".format(hFilepath))
            myCFile.write("// {}\n//\n".format(cFilepath))
            lines = "// This UTF-8 file was automatically generated by USFMMarkers.py V{} on {}\n//\n".format(
                ProgVersion, datetime.now())
            myHFile.write(lines)
            myCFile.write(lines)
            if self.titleString:
                lines = "// {} data\n".format(self.titleString)
                myHFile.write(lines)
                myCFile.write(lines)
            if self.ProgVersion:
                lines = "//  Version: {}\n".format(self.ProgVersion)
                myHFile.write(lines)
                myCFile.write(lines)
            if self.dateString:
                lines = "//  Date: {}\n//\n".format(self.dateString)
                myHFile.write(lines)
                myCFile.write(lines)
            myCFile.write(
                "//   {} {} loaded from the original XML file.\n//\n\n".format(
                    len(self._XMLtree), self._treeTag))
            myHFile.write("\n#ifndef {}\n#define {}\n\n".format(
                ifdefName, ifdefName))
            myCFile.write('#include "{}"\n\n'.format(
                os.path.basename(hFilepath)))

            CHAR = "const unsigned char"
            BYTE = "const int"
            dictInfo = {
                "referenceNumberDict":
                ("referenceNumber (integer 1..255)",
                 "{} referenceNumber; {}* ByzantineAbbreviation; {}* CCELNumberString; {}* NETBibleAbbreviation; {}* OSISAbbreviation; {} ParatextAbbreviation[3+1]; {} ParatextNumberString[2+1]; {}* SBLAbbreviation; {}* SwordAbbreviation; {}* nameEnglish; {}* numExpectedChapters; {}* possibleAlternativeBooks; {} marker[3+1];"
                 .format(BYTE, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR,
                         CHAR, CHAR, CHAR, CHAR)),
                "rawMarkerDict":
                ("marker",
                 "{} marker[3+1]; {}* ByzantineAbbreviation; {}* CCELNumberString; {} referenceNumber; {}* NETBibleAbbreviation; {}* OSISAbbreviation; {} ParatextAbbreviation[3+1]; {} ParatextNumberString[2+1]; {}* SBLAbbreviation; {}* SwordAbbreviation; {}* nameEnglish; {}* numExpectedChapters; {}* possibleAlternativeBooks;"
                 .format(CHAR, CHAR, CHAR, BYTE, CHAR, CHAR, CHAR, CHAR, CHAR,
                         CHAR, CHAR, CHAR, CHAR)),
                "CCELDict":
                ("CCELNumberString",
                 "{}* CCELNumberString; {} referenceNumber; {} marker[3+1];".
                 format(CHAR, BYTE, CHAR)),
                "SBLDict":
                ("SBLAbbreviation",
                 "{}* SBLAbbreviation; {} referenceNumber; {} marker[3+1];".
                 format(CHAR, BYTE, CHAR)),
                "EnglishNameDict":
                ("nameEnglish",
                 "{}* nameEnglish; {} referenceNumber; {} marker[3+1];".format(
                     CHAR, BYTE, CHAR))
            }

            for dictName, dictData in self.__DataDicts.items():
                exportPythonDict(myHFile, myCFile, dictData, dictName,
                                 dictInfo[dictName][0], dictInfo[dictName][1])

            myHFile.write("#endif // {}\n\n".format(ifdefName))
            myHFile.write("// end of {}".format(os.path.basename(hFilepath)))
            myCFile.write("// end of {}".format(os.path.basename(cFilepath)))

Exemplo n.º 8

0

Exibir arquivo

class BibleOrganizationalSystemsConverter:
    """
    Class for handling and converting BibleOrganizationalSystems.
    """
    def __init__(self):
        """
        Constructor: expects the filepath of the source XML file.
        Loads (and crudely validates the XML file) into an element tree.
        """
        self._filenameBase = "BibleOrganizationalSystems"

        # These fields are used for parsing the XML
        self._treeTag = "BibleOrganizationalSystems"
        self._headerTag = "header"
        self._mainElementTag = "BibleOrganizationalSystem"

        # These fields are used for automatically checking/validating the XML
        self._compulsoryAttributes = ("type", )
        self._optionalAttributes = ()
        self._uniqueAttributes = ()
        self._compulsoryElements = (
            "referenceAbbreviation",
            "languageCode",
        )
        self._optionalElements = (
            "name",
            "completionDate",
            "publicationDate",
            "copyright",
            "versificationSystem",
            "punctuationSystem",
            "bookOrderSystem",
            "booksNamesSystem",
            "translator",
            "publisher",
            "derivedFrom",
            "usesText",
            "includesBooks",
            "url",
            "comment",
        )
        self._uniqueElements = ()
        self._allowedMultiple = (
            "name",
            "translator",
            "derivedFrom",
            "usesText",
            "url",
            "comment",
        )

        # These are fields that we will fill later
        self.title, self.version, self.date = None, None, None
        self.header, self._XMLtree = None, None
        self.__dataDicts = None

        # Get the data tables that we need for proper checking
        self._ISOLanguages = ISO_639_3_Languages().loadData()
        self._BibleBookOrderSystems = BibleBookOrderSystems().loadData()
        self._BiblePunctuationSystems = BiblePunctuationSystems().loadData()
        self._BibleVersificationSystems = BibleVersificationSystems().loadData(
        )
        self._BibleBooksNamesSystems = BibleBooksNamesSystems().loadData()

    # end of BibleOrganizationalSystemsConverter.__init__

    def __str__(self):
        """
        This method returns the string representation of a Bible book code.

        @return: the name of a Bible object formatted as a string
        @rtype: string
        """
        result = ""
        if self.title: result += ('\n' if result else '') + self.title
        if self.version:
            result += ('\n' if result else '') + "  Version: {}".format(
                self.version)
        if self.date:
            result += ('\n' if result else '') + "  Date: {}".format(self.date)
        result += ('\n' if result else '') + "  Number of entries = {}".format(
            len(self._XMLtree))
        return result

    # end of BibleOrganizationalSystemsConverter.__str__

    def __len__(self):
        """ Returns the number of items loaded. """
        return len(self._XMLtree)

    # end of BibleOrganizationalSystemsConverter.__len__

    def loadAndValidate(self, XMLFilepath=None):
        """
        Loads (and crudely validates the XML file) into an element tree.
            Allows the filepath of the source XML file to be specified, otherwise uses the default.
        """
        if self._XMLtree is None:  # We mustn't have already have loaded the data
            if XMLFilepath is None:
                XMLFilepath = os.path.join(
                    os.path.dirname(__file__), "DataFiles",
                    self._filenameBase + ".xml")  # Relative to module, not cwd

            self._load(XMLFilepath)
            if Globals.strictCheckingFlag:
                self._validate()
        return self

    # end of BibleOrganizationalSystemsConverter.loadAndValidate

    def _load(self, XMLFilepath):
        """
        Load the source XML file and remove the header from the tree.
        Also, extracts some useful elements from the header element.
        """
        assert (XMLFilepath)
        self.__XMLFilepath = XMLFilepath
        assert (self._XMLtree is None or len(self._XMLtree) == 0
                )  # Make sure we're not doing this twice

        if Globals.verbosityLevel > 2:
            print(
                _("Loading BibleOrganisationalSystems XML file from '{}'...").
                format(self.__XMLFilepath))
        self._XMLtree = ElementTree().parse(self.__XMLFilepath)
        assert (self._XMLtree)  # Fail here if we didn't load anything at all

        if self._XMLtree.tag == self._treeTag:
            header = self._XMLtree[0]
            if header.tag == self._headerTag:
                self.header = header
                self._XMLtree.remove(header)
                if len(header) > 1:
                    logging.info(_("Unexpected elements in header"))
                elif len(header) == 0:
                    logging.info(_("Missing work element in header"))
                else:
                    work = header[0]
                    if work.tag == "work":
                        self.version = work.find("version").text
                        self.date = work.find("date").text
                        self.title = work.find("title").text
                    else:
                        logging.warning(_("Missing work element in header"))
            else:
                logging.warning(
                    _("Missing header element (looking for '{}' tag)").format(
                        self._headerTag))
        else:
            logging.error(
                _("Expected to load '{}' but got '{}'").format(
                    self._treeTag, self._XMLtree.tag))

    # end of BibleOrganizationalSystemsConverter._load

    def _validate(self):
        """
        Check/validate the loaded data.
        """
        assert (self._XMLtree)

        uniqueDict = {}
        for elementName in self._uniqueElements:
            uniqueDict["Element_" + elementName] = []
        for attributeName in self._uniqueAttributes:
            uniqueDict["Attribute_" + attributeName] = []

        expectedID = 1
        for j, element in enumerate(self._XMLtree):
            if element.tag == self._mainElementTag:
                # Check compulsory attributes on this main element
                for attributeName in self._compulsoryAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is None:
                        logging.error(
                            _("Compulsory '{}' attribute is missing from {} element in record {}"
                              ).format(attributeName, element.tag, j))
                    if not attributeValue:
                        logging.warning(
                            _("Compulsory '{}' attribute is blank on {} element in record {}"
                              ).format(attributeName, element.tag, j))

                # Check optional attributes on this main element
                for attributeName in self._optionalAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is not None:
                        if not attributeValue:
                            logging.warning(
                                _("Optional '{}' attribute is blank on {} element in record {}"
                                  ).format(attributeName, element.tag, j))

                # Check for unexpected additional attributes on this main element
                for attributeName in element.keys():
                    attributeValue = element.get(attributeName)
                    if attributeName not in self._compulsoryAttributes and attributeName not in self._optionalAttributes:
                        logging.warning(
                            _("Additional '{}' attribute ('{}') found on {} element in record {}"
                              ).format(attributeName, attributeValue,
                                       element.tag, j))

                # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes)
                for attributeName in self._uniqueAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is not None:
                        if attributeValue in uniqueDict["Attribute_" +
                                                        attributeName]:
                            logging.error(
                                _("Found '{}' data repeated in '{}' field on {} element in record {}"
                                  ).format(attributeValue, attributeName,
                                           element.tag, j))
                        uniqueDict["Attribute_" +
                                   attributeName].append(attributeValue)

                ID = element.find("referenceAbbreviation").text

                # Check compulsory elements
                for elementName in self._compulsoryElements:
                    if element.find(elementName) is None:
                        logging.error(
                            _("Compulsory '{}' element is missing in record with ID '{}' (record {})"
                              ).format(elementName, ID, j))
                    elif not element.find(elementName).text:
                        logging.warning(
                            _("Compulsory '{}' element is blank in record with ID '{}' (record {})"
                              ).format(elementName, ID, j))

                # Check optional elements
                for elementName in self._optionalElements:
                    if element.find(elementName) is not None:
                        if not element.find(elementName).text:
                            logging.warning(
                                _("Optional '{}' element is blank in record with ID '{}' (record {})"
                                  ).format(elementName, ID, j))

                # Check for unexpected additional elements
                for subelement in element:
                    if subelement.tag not in self._compulsoryElements and subelement.tag not in self._optionalElements:
                        logging.warning(
                            _("Additional '{}' element ('{}') found in record with ID '{}' (record {})"
                              ).format(subelement.tag, subelement.text, ID, j))

                # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements)
                for elementName in self._uniqueElements:
                    if element.find(elementName) is not None:
                        text = element.find(elementName).text
                        if text in uniqueDict["Element_" + elementName]:
                            logging.error(
                                _("Found '{}' data repeated in '{}' element in record with ID '{}' (record {})"
                                  ).format(text, elementName, ID, j))
                        uniqueDict["Element_" + elementName].append(text)

                # Special checks of particular fields
                if element.find("includesBooks") is not None:
                    bookList = element.find("includesBooks").text.split()
                    for BBB in bookList:
                        if not Globals.BibleBooksCodes.isValidReferenceAbbreviation(
                                BBB):
                            logging.critical(
                                _("Unrecognized '{}' Bible book code found in 'includesBooks' in record with ID '{}' (record {})"
                                  ).format(BBB, ID, j))
                        if bookList.count(BBB) > 1:
                            logging.error(
                                _("Multiple '{}' Bible book codes found in 'includesBooks' in record with ID '{}' (record {})"
                                  ).format(BBB, ID, j))

            else:
                logging.warning(
                    _("Unexpected element: {} in record {}").format(
                        element.tag, j))

    # end of BibleOrganizationalSystemsConverter._validate

    def importDataToPython(self):
        """
        Loads (and pivots) the data (not including the header) into suitable Python containers to use in a Python program.
        (Of course, you can just use the elementTree in self._XMLtree if you prefer.)
        """
        assert (self._XMLtree)
        if self.__dataDicts:  # We've already done an import/restructuring -- no need to repeat it
            return self.__dataDicts

        # We'll create a number of dictionaries with different elements as the key
        dataDict, indexDict, combinedIndexDict = {}, {}, {}
        for element in self._XMLtree:
            bits = {}
            # Get the required information out of the tree for this element
            # Start with the compulsory elements and type attribute
            referenceAbbreviation = element.find("referenceAbbreviation").text
            bits["referenceAbbreviation"] = referenceAbbreviation
            myType = element.get("type")
            bits["type"] = myType
            if myType not in allowedTypes:
                logging.error(
                    _("Unrecognized '{}' type for '{}' (expected one of {})").
                    format(myType, referenceAbbreviation, allowedTypes))
            languageCode = element.find("languageCode").text
            if self._ISOLanguages and not self._ISOLanguages.isValidLanguageCode(
                    languageCode):  # Check that we have a valid language code
                logging.error(
                    "Unrecognized '{}' ISO-639-3 language code in '{}' organisational system"
                    .format(languageCode, referenceAbbreviation))
            bits["languageCode"] = languageCode

            # Now work on the optional elements
            for name in ("name", "publicationDate", "versificationSystem",
                         "punctuationSystem", "bookOrderSystem",
                         "booksNamesSystem", "derivedFrom", "usesText",
                         "includesBooks"):
                for nameData in element.findall(name):
                    if name in self._allowedMultiple:  # Put multiple entries into a list
                        if name not in bits: bits[name] = [nameData.text]
                        else: bits[name].append(nameData.text)
                    else:  # Not allowed multiples
                        if name in bits:
                            logging.error(
                                _("Unexpected multiple {} elements found in {} {}"
                                  ).format(name, referenceAbbreviation,
                                           myType))
                        if name == "includesBooks":  # special handling
                            bits["includesBooks"] = nameData.text.split()
                            for BBB in bits["includesBooks"]:
                                if not Globals.BibleBooksCodes.isValidReferenceAbbreviation(
                                        BBB):
                                    logging.error(
                                        _("Unrecognized '{}' Bible book code found in 'includesBooks' in {} {}"
                                          ).format(BBB, referenceAbbreviation,
                                                   myType))
                        else:
                            bits[name] = nameData.text  # normal handling

            extension = '_' + myType
            extendedRA = referenceAbbreviation if referenceAbbreviation.endswith(
                extension) else (referenceAbbreviation + extension)
            dataDict[extendedRA] = bits
            if referenceAbbreviation in indexDict:
                indexDict[referenceAbbreviation].append(extendedRA)
            else:
                indexDict[referenceAbbreviation] = [extendedRA]
            if referenceAbbreviation in combinedIndexDict:
                combinedIndexDict[referenceAbbreviation].append(extendedRA)
            else:
                combinedIndexDict[referenceAbbreviation] = [extendedRA]
            if extendedRA != referenceAbbreviation:
                #assert( extendedRA not in combinedIndexDict )
                if extendedRA in combinedIndexDict:
                    logging.error(
                        _("Found {} in combinedIndexDict").format(extendedRA))
                combinedIndexDict[extendedRA] = [extendedRA]
        assert (len(indexDict) <= len(dataDict))
        assert (len(combinedIndexDict) >= len(indexDict))

        if Globals.strictCheckingFlag:  # We'll do quite a bit more cross-checking now
            for extendedReferenceAbbreviation, data in dataDict.items():
                #print( extendedReferenceAbbreviation, data )
                systemType = data['type']
                if systemType == 'edition':
                    if 'derivedFrom' in data:
                        logging.error(
                            _("{} shouldn't use 'derivedFrom' '{}'").format(
                                extendedReferenceAbbreviation,
                                data['derivedFrom']))
                    if 'usesText' not in data:
                        logging.error(
                            _("{} doesn't specify 'usesText'").format(
                                extendedReferenceAbbreviation))
                    else:  # have a 'usesText' list
                        for textAbbrev in data['usesText']:
                            if textAbbrev not in indexDict:
                                logging.error(
                                    _("{} specifies unknown '{}' text in 'usesText' field"
                                      ).format(extendedReferenceAbbreviation,
                                               textAbbrev))
                            elif len(indexDict[textAbbrev]
                                     ) > 1:  # it could be ambiguous
                                found = 0
                                for thisType in (
                                        'revision', 'translation',
                                        'original'):  # but not 'edition'
                                    usesTextExtended = textAbbrev + '_' + thisType
                                    if usesTextExtended in dataDict:
                                        foundOne = usesTextExtended
                                        found += 1
                                assert (found > 0)
                                if found == 1:  # ah, it's not actually ambiguous
                                    if Globals.verbosityLevel > 2:
                                        print(
                                            _("Adjusted text used for {} from the ambiguous '{}' to the extended name '{}'"
                                              ).
                                            format(
                                                extendedReferenceAbbreviation,
                                                textAbbrev, foundOne))
                                    data['usesText'].remove(textAbbrev)
                                    data['usesText'].append(foundOne)
                                else:
                                    logging.warning(
                                        _("{} specifies ambiguous '{}' (could be {}) texts in 'usesText' field"
                                          ).format(
                                              extendedReferenceAbbreviation,
                                              textAbbrev,
                                              indexDict[textAbbrev]))
                elif systemType == 'revision':
                    if 'derivedFrom' not in data:
                        logging.error(
                            _("{} doesn't specify 'derivedFrom'").format(
                                extendedReferenceAbbreviation))
                    else:
                        for df in data['derivedFrom']:
                            if df not in indexDict:
                                logging.error(
                                    _("{} specifies unknown '{}' text in 'derivedFrom' field"
                                      ).format(extendedReferenceAbbreviation,
                                               df))
                            elif len(indexDict[df]) > 1:
                                logging.warning(
                                    _("{} specifies ambiguous '{}' (could be {}) texts in 'derivedFrom' field"
                                      ).format(extendedReferenceAbbreviation,
                                               df, indexDict[df]))
                elif systemType == 'translation':
                    if 'derivedFrom' not in data:
                        logging.warning(
                            _("{} doesn't specify 'derivedFrom'").format(
                                extendedReferenceAbbreviation))
                    else:
                        for df in data['derivedFrom']:
                            if df not in indexDict:
                                logging.error(
                                    _("{} specifies unknown '{}' text in 'derivedFrom' field"
                                      ).format(extendedReferenceAbbreviation,
                                               df))
                            elif len(indexDict[df]) > 1:
                                logging.warning(
                                    _("{} specifies ambiguous '{}' (could be {}) texts in 'derivedFrom' field"
                                      ).format(extendedReferenceAbbreviation,
                                               df, indexDict[df]))
                elif systemType == 'original':
                    if 'derivedFrom' in data:
                        logging.error(
                            _("{} shouldn't use 'derivedFrom' '{}'").format(
                                extendedReferenceAbbreviation,
                                data['derivedFrom']))
                if 'versificationSystem' in data and data[
                        'versificationSystem'] not in ('None', 'Unknown'):
                    if not self._BibleVersificationSystems.isValidVersificationSystemName(
                            data['versificationSystem']):
                        extra = "\n  Available systems are {}".format(
                            self._BibleVersificationSystems.
                            getAvailableVersificationSystemNames(
                            )) if Globals.verbosityLevel > 2 else ''
                        logging.error(
                            _("Unknown '{}' versification system name in {}{}"
                              ).format(data['versificationSystem'],
                                       extendedReferenceAbbreviation, extra))
                if 'punctuationSystem' in data and data[
                        'punctuationSystem'] not in ('None', 'Unknown'):
                    if not self._BiblePunctuationSystems.isValidPunctuationSystemName(
                            data['punctuationSystem']):
                        extra = "\n  Available systems are {}".format(
                            self._BiblePunctuationSystems.
                            getAvailablePunctuationSystemNames(
                            )) if Globals.verbosityLevel > 2 else ''
                        logging.error(
                            _("Unknown '{}' punctuation system name in {}{}").
                            format(data['punctuationSystem'],
                                   extendedReferenceAbbreviation, extra))

        self.__dataDicts = dataDict, indexDict, combinedIndexDict
        return self.__dataDicts

    # end of importDataToPython

    def pickle(self, filepath=None):
        """
        Writes the information tables to a .pickle file that can be easily loaded into a Python3 program.
        """
        import pickle

        assert (self._XMLtree)
        self.importDataToPython()
        assert (self.__dataDicts)

        if not filepath:
            folder = os.path.join(
                os.path.split(self.__XMLFilepath)[0], "DerivedFiles/")
            if not os.path.exists(folder): os.mkdir(folder)
            filepath = os.path.join(folder,
                                    self._filenameBase + "_Tables.pickle")
        if Globals.verbosityLevel > 1:
            print(_("Exporting to {}...").format(filepath))
        with open(filepath, 'wb') as myFile:
            pickle.dump(self.__dataDicts, myFile)

    # end of pickle

    def exportDataToPython(self, filepath=None):
        """
        Writes the information tables to a .py file that can be cut and pasted into a Python program.
        """
        def exportPythonDict(theFile, theDict, dictName, keyComment,
                             fieldsComment):
            """Exports theDict to theFile."""
            theFile.write(
                "{} = {{\n  # Key is {}\n  # Fields are: {}\n".format(
                    dictName, keyComment, fieldsComment))
            for dictKey in sorted(theDict.keys()):
                theFile.write('  {}: {},\n'.format(repr(dictKey),
                                                   theDict[dictKey]))
            theFile.write("}}\n# end of {}\n\n".format(dictName))

        # end of exportPythonDict

        assert (self._XMLtree)
        self.importDataToPython()
        assert (self.__dataDicts)

        if not filepath:
            filepath = os.path.join(
                os.path.split(self.__XMLFilepath)[0], "DerivedFiles",
                self._filenameBase + "_Tables.py")
        if Globals.verbosityLevel > 1:
            print(_("Exporting to {}...").format(filepath))

        dataDict, indexDict, combinedIndexDict = self.importDataToPython()
        with open(filepath, 'wt') as myFile:
            myFile.write("# {}\n#\n".format(filepath))
            myFile.write(
                "# This UTF-8 file was automatically generated by BibleOrganizationalSystemsConverter.py V{} on {}\n#\n"
                .format(ProgVersion, datetime.now()))
            if self.title: myFile.write("# {}\n".format(self.title))
            if self.version:
                myFile.write("#  Version: {}\n".format(self.version))
            if self.date: myFile.write("#  Date: {}\n#\n".format(self.date))
            myFile.write(
                "#   {} {} entries loaded from the original XML file.\n".
                format(len(self._XMLtree), self._treeTag))
            #myFile.write( "#   {} {} loaded from the original XML files.\n#\n\n".format( len(self.systems), self._treeTag ) )
            exportPythonDict(
                myFile, dataDict, "dataDict", "extendedReferenceAbbreviation",
                "referenceAbbreviation, SBLAbbreviation, OSISAbbreviation, ParatextAbbreviation, ParatextNumberString, nameEnglish (comment only)"
            )
            exportPythonDict(
                myFile, indexDict, "indexDict", "referenceAbbreviation",
                "id, SBLAbbreviation, OSISAbbreviation, ParatextAbbreviation, ParatextNumberString, nameEnglish (comment only)"
            )
            exportPythonDict(
                myFile, combinedIndexDict, "combinedIndexDict",
                "referenceAbbreviation",
                "id, SBLAbbreviation, OSISAbbreviation, ParatextAbbreviation, ParatextNumberString, nameEnglish (comment only)"
            )

    # end of exportDataToPython

    def exportDataToJSON(self, filepath=None):
        """
        Writes the information tables to a .json file that can be easily loaded into a Java program.

        See http://en.wikipedia.org/wiki/JSON.
        """
        import json

        assert (self._XMLtree)
        self.importDataToPython()
        assert (self.__dataDicts)

        if not filepath:
            filepath = os.path.join(
                os.path.split(self.__XMLFilepath)[0], "DerivedFiles",
                self._filenameBase + "_Tables.json")
        if Globals.verbosityLevel > 1:
            print(_("Exporting to {}...").format(filepath))
        with open(filepath, 'wt') as myFile:
            #myFile.write( "# {}\n#\n".format( filepath ) ) # Not sure yet if these comment fields are allowed in JSON
            #myFile.write( "# This UTF-8 file was automatically generated by BibleBooksCodes.py V{} on {}\n#\n".format( ProgVersion, datetime.now() ) )
            #if self.titleString: myFile.write( "# {} data\n".format( self.titleString ) )
            #if self.ProgVersion: myFile.write( "#  Version: {}\n".format( self.ProgVersion ) )
            #if self.dateString: myFile.write( "#  Date: {}\n#\n".format( self.dateString ) )
            #myFile.write( "#   {} {} loaded from the original XML file.\n#\n\n".format( len(self._XMLtree), self._treeTag ) )
            json.dump(self.__dataDicts, myFile, indent=2)
            #myFile.write( "\n\n# end of {}".format( os.path.basename(filepath) ) )

    # end of exportDataToJSON

    def exportDataToC(self, filepath=None):
        """
        Writes the information tables to a .h file that can be included in c and c++ programs.
        """
        raise Exception("C export not written yet")

        def exportPythonDict(theFile, theDict, dictName, structName,
                             fieldsComment):
            """Exports theDict to theFile."""
            def convertEntry(entry):
                """Convert special characters in an entry..."""
                result = ""
                for field in entry:
                    if result: result += ", "  # Separate the fields
                    if field is None: result += '""'
                    elif isinstance(field, str):
                        result += '"' + str(field).replace('"', '\\"') + '"'
                    elif isinstance(field, int):
                        result += str(field)
                    else:
                        logging.error(
                            _("Cannot convert unknown field type '{}' in entry '{}'"
                              ).format(field, entry))
                return result

            theFile.write(
                "static struct {} {}[] = {\n  // Fields are {}\n".format(
                    structName, dictName, fieldsComment))
            for entry in sorted(theDict.keys()):
                if isinstance(entry, str):
                    theFile.write("  {\"{}\", {}},\n".format(
                        entry, convertEntry(theDict[entry])))
                elif isinstance(entry, int):
                    theFile.write("  {{}, {}},\n".format(
                        entry, convertEntry(theDict[entry])))
                else:
                    logging.error(
                        _("Can't handle this type of data yet: {}").format(
                            entry))
            theFile.write("}; // {}\n\n".format(dictName))

        # end of exportPythonDict

        assert (self._XMLtree)
        self.importDataToPython()
        assert (self.__dataDicts)

        if not filepath:
            filepath = os.path.join(
                os.path.split(self.__XMLFilepath)[0], "DerivedFiles",
                self._filenameBase + "_Tables.h")
        if Globals.verbosityLevel > 1:
            print(_("Exporting to {}...").format(filepath))

        IDDict, RADict, SBLDict, OADict, PADict, PNDict = self.importDataToPython(
        )
        ifdefName = self._filenameBase.upper() + "_Tables_h"
        with open(filepath, 'wt') as myFile:
            myFile.write("// {}\n//\n".format(filepath))
            myFile.write(
                "// This UTF-8 file was automatically generated by BibleOrganizationalSystemsConverter.py V{} on {}\n//\n"
                .format(ProgVersion, datetime.now()))
            if self.title: myFile.write("// {}\n".format(self.title))
            if self.version:
                myFile.write("//  Version: {}\n".format(self.version))
            if self.date: myFile.write("//  Date: {}\n//\n".format(self.date))
            myFile.write(
                "//   {} {} loaded from the original XML file.\n//\n\n".format(
                    len(self._XMLtree), self._treeTag))
            myFile.write("#ifndef {}\n#define {}\n\n".format(
                ifdefName, ifdefName))
            exportPythonDict(
                myFile, IDDict, "IDDict",
                "{int id; char* refAbbrev; char* SBLAbbrev; char* OSISAbbrev; char* PTAbbrev; char* PTNum; char* EngName;}",
                "id (sorted), referenceAbbreviation, SBLAbbreviation, OSISAbbreviation, ParatextAbbreviation, ParatextNumberString, nameEnglish (comment only)"
            )
            exportPythonDict(
                myFile, RADict, "RADict",
                "{char* refAbbrev; int id; char* SBLAbbrev; char* OSISAbbrev; char* PTAbbrev; char* PTNum; char* EngName;}",
                "referenceAbbreviation (sorted), SBLAbbreviation, OSISAbbreviation, ParatextAbbreviation, ParatextNumberString, id, nameEnglish (comment only)"
            )
            exportPythonDict(
                myFile, SBLDict, "SBLDict",
                "{char* SBLAbbrev; int id; char* refAbbrev; char* OSISAbbrev; char* PTAbbrev; char* PTNum; char* EngName;}",
                "SBLAbbreviation (sorted), ReferenceAbbreviation, OSISAbbreviation, ParatextAbbreviation, ParatextNumberString, id, nameEnglish (comment only)"
            )
            exportPythonDict(
                myFile, OADict, "OADict",
                "{char* OSISAbbrev; int id; char* refAbbrev; char* SBLAbbrev; char* PTAbbrev; char* PTNum; char* EngName;}",
                "OSISAbbreviation (sorted), ReferenceAbbreviation, SBLAbbreviation, ParatextAbbreviation, ParatextNumberString, id, nameEnglish (comment only)"
            )
            exportPythonDict(
                myFile, PADict, "PADict",
                "{char* PTAbbrev; int id; char* refAbbrev; char* SBLAbbrev; char* OSISAbbrev; char* PTNum; char* EngName;}",
                "ParatextAbbreviation (sorted), referenceAbbreviation, SBLAbbreviation, OSISAbbreviation, ParatextNumberString, id, nameEnglish (comment only)"
            )
            exportPythonDict(
                myFile, PNDict, "PNDict",
                "{char* PTNum; int id; char* PTAbbrev; char* refAbbrev; char* SBLAbbrev; char* OSISAbbrev; char* EngName;}",
                "ParatextNumberString (sorted), ParatextAbbreviation, referenceAbbreviation, SBLAbbreviation, OSISAbbreviation, id, nameEnglish (comment only)"
            )
            myFile.write("#endif // {}\n".format(ifdefName))

Exemplo n.º 9

0

Exibir arquivo

class HaggaiXMLBible( Bible ):
    """
    Class for reading, validating, and converting HaggaiXMLBible XML.
    """
    XMLNameSpace = "{http://www.w3.org/2001/XMLSchema-instance}"
    treeTag = 'XMLBIBLE'
    infoTag = 'INFORMATION'
    bookTag = 'BIBLEBOOK'
    chapterTag = 'CHAPTER'
    captionTag = 'CAPTION'
    paragraphTag = 'PARAGRAPH'
    verseTag = 'VERSE'
    noteTag = 'NOTE'
    styleTag = 'STYLE'
    breakTag = 'BR'


    def __init__( self, sourceFolder, givenName, encoding='utf-8' ):
        """
        Constructor: just sets up the Haggai Bible object.
        """
         # Setup and initialise the base class first
        Bible.__init__( self )
        self.objectNameString = 'Haggai XML Bible object'
        self.objectTypeString = 'Haggai'

        # Now we can set our object variables
        self.sourceFolder, self.givenName, self.encoding = sourceFolder, givenName, encoding
        self.sourceFilepath =  os.path.join( self.sourceFolder, self.givenName )

        self.tree = self.header = None # Will hold the XML data

        # Get the data tables that we need for proper checking
        #self.ISOLanguages = ISO_639_3_Languages().loadData()
        self.genericBOS = BibleOrganizationalSystem( 'GENERIC-KJV-66-ENG' )

        # Do a preliminary check on the readability of our file
        if not os.access( self.sourceFilepath, os.R_OK ):
            print( "HaggaiXMLBible: File {!r} is unreadable".format( self.sourceFilepath ) )

        self.name = self.givenName
        #if self.name is None:
            #pass
    # end of HaggaiXMLBible.__init__


    def load( self ):
        """
        Load a single source XML file and load book elements.
        """
        if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading {}…").format( self.sourceFilepath ) )
        try: self.tree = ElementTree().parse( self.sourceFilepath )
        except ParseError as err:
            logging.critical( exp("Loader parse error in xml file {}: {} {}").format( self.givenName, sys.exc_info()[0], err ) )
            #loadErrors.append( exp("Loader parse error in xml file {}: {} {}").format( self.givenName, sys.exc_info()[0], err ) )
            #self.addPriorityError( 100, C, V, _("Loader parse error in xml file {}: {}").format( self.givenName, err ) )
        if BibleOrgSysGlobals.debugFlag: assert len ( self.tree ) # Fail here if we didn't load anything at all

        # Find the main (bible) container
        if self.tree.tag == HaggaiXMLBible.treeTag:
            location = "Haggai XML file"
            BibleOrgSysGlobals.checkXMLNoText( self.tree, location, '4f6h' )
            BibleOrgSysGlobals.checkXMLNoTail( self.tree, location, '1wk8' )

            schema = name = status = BibleType = revision = version = lgid = None
            for attrib,value in self.tree.items():
                if attrib == HaggaiXMLBible.XMLNameSpace + 'noNamespaceSchemaLocation':
                    schema = value
                elif attrib == "biblename":
                    name = value
                elif attrib == "lgid":
                    lgid = value # In italian.xml this is set to "german"
                elif attrib == "status":
                    status = value
                elif attrib == "type":
                    BibleType = value
                elif attrib == "revision":
                    revision = value
                elif attrib == 'version':
                    version = value
                else: logging.warning( "Unprocessed {!r} attribute ({}) in main element".format( attrib, value ) )
            if name: self.name = name
            if status: self.status = status
            if revision: self.revision = revision
            if version: self.version = version

            if self.tree[0].tag == 'INFORMATION':
                self.header = self.tree[0]
                self.tree.remove( self.header )
                self.__validateAndExtractHeader()
            else: # Handle information records at the END of the file
                ix = len(self.tree) - 1
                if self.tree[ix].tag == 'INFORMATION':
                    self.header = self.tree[ix]
                    self.tree.remove( self.header )
                    self.__validateAndExtractHeader()

            # Find the submain (book) containers
            for element in self.tree:
                if element.tag == HaggaiXMLBible.bookTag:
                    sublocation = "book in " + location
                    BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'g3g5' )
                    BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'd3f6' )
                    self.__validateAndExtractBook( element )
                else: logging.error( "Expected to find {!r} but got {!r}".format( HaggaiXMLBible.bookTag, element.tag ) )
        else: logging.error( "Expected to load {!r} but got {!r}".format( HaggaiXMLBible.treeTag, self.tree.tag ) )
        self.doPostLoadProcessing()
    # end of HaggaiXMLBible.load


    def __validateAndExtractHeader( self ):
        """
        Extracts information out of the header record, such as:
            <INFORMATION>
            <title>King James Version</title>
            <creator></creator>
            <subject>The Holy Bible</subject>
            <description>In 1604, King James I of England authorized that a new translation of the Bible into English be started. It was finished in 1611, just 85 years after the first translation of the New Testament into English appeared (Tyndale, 1526). The Authorized Version, or King James Version, quickly became the standard for English-speaking Protestants. Its flowing language and prose rhythm has had a profound influence on the literature of the past 300 years.</description>
            <publisher>FREE BIBLE SOFTWARE GROUP</publisher>
            <contributors />
            <date>2009-01-23</date>
            <type>Bible</type>
            <format>Haggai XML Bible Markup Language</format>
            <identifier>kjv</identifier>
            <source>http://www.unboundbible.com/zips/index.cfm?lang=English</source>
            <language>ENG</language>
            <coverage>provide the Bible to the nations of the world</coverage>
            <rights>We believe that this Bible is found in the Public Domain.</rights>
        </INFORMATION>
        """
        if BibleOrgSysGlobals.debugFlag: assert self.header
        location = 'Header'
        BibleOrgSysGlobals.checkXMLNoAttributes( self.header, location, 'j4j6' )
        BibleOrgSysGlobals.checkXMLNoText( self.header, location, 'sk4l' )
        BibleOrgSysGlobals.checkXMLNoTail( self.header, location, 'a2d4' )

        # TODO: We probably need to rationalise some of the self.xxx stores
        for element in self.header:
            #print( "header", element.tag )
            if element.tag == 'title':
                sublocation = "title in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if BibleOrgSysGlobals.debugFlag: assert element.text
                self.title = element.text
            elif element.tag == 'creator':
                sublocation = "creator in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if element.text: self.creator = element.text
            elif element.tag == 'subject':
                sublocation = "subject in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if element.text: self.subject = element.text
            elif element.tag == 'description':
                sublocation = "description in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if BibleOrgSysGlobals.debugFlag: assert element.text
                self.description = element.text
            elif element.tag == 'publisher':
                sublocation = "publisher in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if element.text: self.publisher = element.text
            elif element.tag == 'contributor':
                sublocation = "contributor in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'alj1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jjd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5gk78' )
                if element.text:
                    try: self.contributor = [ self.contributor, element.text ] # Put multiples into a list
                    except AttributeError: self.contributor = element.text # Must be the first (and possibly only) one
            elif element.tag == 'contributors':
                sublocation = "contributors in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if element.text: self.contributors = element.text
            elif element.tag == 'date':
                sublocation = "date in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if BibleOrgSysGlobals.debugFlag: assert element.text
                self.date = element.text
            elif element.tag == 'type':
                sublocation = "type in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if element.text: self.documentType = element.text
            elif element.tag == 'format':
                sublocation = "format in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if BibleOrgSysGlobals.debugFlag: assert element.text
                if BibleOrgSysGlobals.debugFlag: assert element.text == 'Haggai XML Bible Markup Language'
            elif element.tag == 'identifier':
                sublocation = "identifier in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if BibleOrgSysGlobals.debugFlag: assert element.text
                self.identifier = element.text
            elif element.tag == 'source':
                sublocation = "source in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if BibleOrgSysGlobals.debugFlag: assert element.text
                self.source = element.text
            elif element.tag == 'language':
                sublocation = "language in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if BibleOrgSysGlobals.debugFlag: assert element.text
                self.language = element.text
            elif element.tag == 'coverage':
                sublocation = "coverage in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if element.text: self.coverage = element.text
            elif element.tag == 'rights':
                sublocation = "rights in {}".format( location )
                BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'j3jd' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, '5g78' )
                if element.text: self.rights = element.text
            else: logging.error( "Found unexpected {!r} tag in {}".format( element.tag, location ) )
    # end of HaggaiXMLBible.__validateAndExtractHeader


    def __validateAndExtractBook( self, book ):
        """
        Check/validate and extract book data from the given XML book record
            finding chapter subelements.
        """

        if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML book…") )

        # Process the div attributes first
        BBB = bookName = bookShortName = bookNumber = None
        for attrib,value in book.items():
            if attrib=="bnumber":
                bookNumber = value
            elif attrib=="bname":
                bookName = value
            elif attrib=="bsname":
                bookShortName = value
            else: logging.warning( "Unprocessed {!r} attribute ({}) in book element".format( attrib, value ) )
        if bookNumber:
            try: BBB = BibleOrgSysGlobals.BibleBooksCodes.getBBBFromReferenceNumber( bookNumber )
            except KeyError:
                logging.warning( "Unable to deduce which book is number={}, name={}, shortName={} -- ignoring it" \
                                                                        .format( bookNumber, bookName, bookShortName ) )
        elif bookName:
            BBB = self.genericBOS.getBBBFromText( bookName )

        if BBB:
            if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Validating {} {}…").format( BBB, bookName ) )
            thisBook = BibleBook( self, BBB )
            thisBook.objectNameString = 'Haggai XML Bible Book object'
            thisBook.objectTypeString = 'Haggai'
            #thisBook.sourceFilepath = self.sourceFilepath
            for element in book:
                if element.tag == HaggaiXMLBible.captionTag:
                    sublocation = "caption in {}".format( BBB )
                    BibleOrgSysGlobals.checkXMLNoAttributes( element, sublocation, 'jhl6' )
                    BibleOrgSysGlobals.checkXMLNoSubelements( element, sublocation, 'jk21' )
                    BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'kjh6' )
                    thisBook.addLine( 'mt', element.text )
                elif element.tag == HaggaiXMLBible.chapterTag:
                    sublocation = "chapter in {}".format( BBB )
                    BibleOrgSysGlobals.checkXMLNoText( element, sublocation, 'j3jd' )
                    BibleOrgSysGlobals.checkXMLNoTail( element, sublocation, 'al1d' )
                    self.__validateAndExtractChapter( BBB, thisBook, element )
                else: logging.error( "Expected to find {!r} but got {!r}".format( HaggaiXMLBible.chapterTag, element.tag ) )
            if BibleOrgSysGlobals.verbosityLevel > 2: print( "  Saving {} into results…".format( BBB ) )
            self.stashBook( thisBook )
    # end of HaggaiXMLBible.__validateAndExtractBook


    def __validateAndExtractChapter( self, BBB, thisBook, chapter ):
        """
        Check/validate and extract chapter data from the given XML book record
            finding and saving chapter numbers and
            finding and saving verse elements.
        """

        if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML chapter…") )

        # Process the chapter attributes first
        chapterNumber = numVerses = None
        for attrib,value in chapter.items():
            if attrib=="cnumber":
                chapterNumber = value
            else: logging.warning( "Unprocessed {!r} attribute ({}) in chapter element".format( attrib, value ) )
        if chapterNumber:
            #print( BBB, 'c', chapterNumber )
            thisBook.addLine( 'c', chapterNumber )
        else: logging.error( "Missing 'n' attribute in chapter element for {}".format( BBB ) )

        for element in chapter:
            if element.tag == HaggaiXMLBible.paragraphTag:
                location = "paragraph in {} {}".format( BBB, chapterNumber )
                self.__validateAndExtractParagraph( BBB, chapterNumber, thisBook, element )
            elif element.tag == HaggaiXMLBible.verseTag+'disabled':
                location = "verse in {} {}".format( BBB, chapterNumber )
                self.__validateAndExtractVerse( BBB, chapterNumber, thisBook, element )
            elif element.tag == HaggaiXMLBible.captionTag+'disabled': # Used in Psalms
                location = "caption in {} {}".format( BBB, chapterNumber )
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'k5k8' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'd3f5' )
                # Handle caption attributes
                vRef = None
                for attrib,value in element.items():
                    if attrib=="vref":
                        vRef = value
                        if BibleOrgSysGlobals.debugFlag: assert vRef == '1'
                    else: logging.warning( "Unprocessed {!r} attribute ({}) in caption element".format( attrib, value ) )
                if BibleOrgSysGlobals.debugFlag: assert vRef
                vText = element.text
                if not vText:
                    logging.warning( "{} {}:{} has no text".format( BBB, chapterNumber, vRef ) )
                if vText: # This is the main text of the caption
                    #print( "{} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, vText ) )
                    thisBook.addLine( 'v', '0' + ' ' + vText ) # We save it as verse zero
            else: logging.error( "Expected to find {!r} but got {!r}".format( HaggaiXMLBible.verseTag, element.tag ) )
    # end of HaggaiXMLBible.__validateAndExtractChapter


    def __validateAndExtractParagraph( self, BBB, chapterNumber, thisBook, paragraph ):
        """
        Check/validate and extract paragraph data from the given XML book record
            finding and saving paragraphs and
            finding and saving verse elements.
        """

        if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML paragraph…") )

        location = "paragraph in {} {}".format( BBB, chapterNumber )
        BibleOrgSysGlobals.checkXMLNoAttributes( paragraph, location, 'brgw3' )
        BibleOrgSysGlobals.checkXMLNoText( paragraph, location, 'brgw3' )
        BibleOrgSysGlobals.checkXMLNoTail( paragraph, location, 'brgw3' )
        thisBook.addLine( 'p', '' )

        # Handle verse subelements (verses)
        for element in paragraph:
            if element.tag == HaggaiXMLBible.verseTag:
                location = "verse in {} {}".format( BBB, chapterNumber )
                self.__validateAndExtractVerse( BBB, chapterNumber, thisBook, element )
            elif element.tag == HaggaiXMLBible.captionTag+'disabled': # Used in Psalms
                location = "caption in {} {}".format( BBB, chapterNumber )
                BibleOrgSysGlobals.checkXMLNoTail( element, location, 'k5k8' )
                BibleOrgSysGlobals.checkXMLNoSubelements( element, location, 'd3f5' )
                # Handle caption attributes
                vRef = None
                for attrib,value in element.items():
                    if attrib=="vref":
                        vRef = value
                        if BibleOrgSysGlobals.debugFlag: assert vRef == '1'
                    else: logging.warning( "Unprocessed {!r} attribute ({}) in caption element".format( attrib, value ) )
                if BibleOrgSysGlobals.debugFlag: assert vRef
                vText = element.text
                if not vText:
                    logging.warning( "{} {}:{} has no text".format( BBB, chapterNumber, vRef ) )
                if vText: # This is the main text of the caption
                    #print( "{} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, vText ) )
                    thisBook.addLine( 'v', '0' + ' ' + vText ) # We save it as verse zero
            else: logging.error( "Expected to find {!r} but got {!r}".format( HaggaiXMLBible.verseTag, element.tag ) )
    # end of HaggaiXMLBible.__validateAndExtractParagraph


    def __validateAndExtractVerse( self, BBB, chapterNumber, thisBook, verse ):
        """
        Check/validate and extract verse data from the given XML book record
            finding and saving verse elements.
        """

        if BibleOrgSysGlobals.verbosityLevel > 3: print( _("Validating XML verse…") )

        location = "verse in {} {}".format( BBB, chapterNumber )
        BibleOrgSysGlobals.checkXMLNoTail( verse, location, 'l5ks' )

        # Handle verse attributes
        verseNumber = toVerseNumber = None
        for attrib,value in verse.items():
            if attrib=="vnumber":
                verseNumber = value
            else: logging.warning( "Unprocessed {!r} attribute ({}) in verse element".format( attrib, value ) )
        if BibleOrgSysGlobals.debugFlag: assert verseNumber
        location = "{}:{}".format( location, verseNumber ) # Get a better location description
        #thisBook.addLine( 'v', verseNumber )
        vText = '' if verse.text is None else verse.text
        if vText: vText = vText.strip()
        #if not vText: # This happens if a verse starts immediately with a style or note
            #logging.warning( "{} {}:{} has no text".format( BBB, chapterNumber, verseNumber ) )

        # Handle verse subelements (notes and styled portions)
        for subelement in verse:
            if subelement.tag == HaggaiXMLBible.noteTag:
                sublocation = "note in " + location
                noteType = None
                for attrib,value in subelement.items():
                    if attrib=="type": noteType = value
                    else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) )
                if noteType and noteType not in ('variant',):
                    logging.warning( "Unexpected {} note type in {}".format( noteType, BBB ) )
                nText, nTail = subelement.text, subelement.tail
                #print( "note", BBB, chapterNumber, verseNumber, noteType, repr(nText), repr(nTail) )
                vText += "\\f + \\fk {} \\ft {}\\f*".format( noteType, nText ) if noteType else "\\f + \\ft {}\\f*".format( nText )
                if nTail:
                    if '\n' in nTail:
                        print( "HaggaiXMLBible.__validateAndExtractVerse: nTail {} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, nTail ) )
                        nTail = nTail.replace( '\n', ' ' )
                    vText += nTail
                for subsubelement in subelement:
                    if subsubelement.tag == HaggaiXMLBible.styleTag:
                        subsublocation = "style in " + sublocation
                        BibleOrgSysGlobals.checkXMLNoSubelements( subsubelement, subsublocation, 'fyt4' )
                        fs = css = idStyle = None
                        for attrib,value in subsubelement.items():
                            if attrib=='fs': fs = value
                            #elif attrib=="css": css = value
                            #elif attrib=="id": idStyle = value
                            else: logging.warning( "Unprocessed {!r} attribute ({}) in style subsubelement".format( attrib, value ) )
                        if BibleOrgSysGlobals.debugFlag: assert fs or css or idStyle
                        SFM = None
                        if fs == 'italic': SFM = '\\it'
                        elif fs == 'super': SFM = '\\bdit'
                        elif fs == 'emphasis': SFM = '\\em'
                        else: print( "fs is", fs, "css is", css, "idStyle is", idStyle ); halt
                        #if css == "font-style:italic": SFM = '\\it'
                        #elif css == "font-style:italic;font-weight:bold": SFM = '\\bdit'
                        #elif css == "color:#FF0000": SFM = '\\em'
                        #elif css == "font-size: x-small; color:#8B8378": SFM = '\\add'
                        #elif css is None and idStyle=='cl:divineName': SFM = '\\nd'
                        #else: print( "css is", css, "idStyle is", idStyle ); halt
                        sText, sTail = subsubelement.text.strip(), subsubelement.tail
                        if BibleOrgSysGlobals.debugFlag: assert sText
                        if SFM: vText += SFM+' ' + sText + SFM+'*'
                        else: vText += '\\sc ' + '['+css+']' + sText + '\\sc* ' # Use sc for unknown styles
                        if sTail: vText += sTail.strip()
                    else: logging.error( "Expected to find {} but got {!r} in {}".format( HaggaiXMLBible.styleTag, subsubelement.tag, sublocation ) )

            elif subelement.tag == HaggaiXMLBible.styleTag:
                sublocation = "style in " + location
                BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 'f5gh' )
                fs = css = idStyle = None
                for attrib,value in subelement.items():
                    if attrib=="fs": fs = value
                    #elif attrib=="css": css = value
                    #elif attrib=="id": idStyle = value
                    else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) )
                if BibleOrgSysGlobals.debugFlag: assert fs
                SFM = None
                if fs == 'super': SFM = '\\bdit'
                elif fs == 'emphasis': SFM = '\\em'
                else: print( "fs is", fs, "css is", css, "idStyle is", idStyle ); halt
                #if css == "font-style:italic": SFM = '\\it'
                #elif css == "font-style:italic;font-weight:bold": SFM = '\\bdit'
                #elif css == "color:#FF0000": SFM = '\\em'
                #elif css == "font-size: x-small; color:#8B8378": SFM = '\\add'
                #elif css is None and idStyle=='cl:divineName': SFM = '\\nd'
                #else: print( "css is", css, "idStyle is", idStyle ); halt
                sText, sTail = subelement.text.strip(), subelement.tail
                if BibleOrgSysGlobals.debugFlag: assert sText
                #print( BBB, chapterNumber, sublocation )
                if SFM: vText += SFM+' ' + sText + SFM+'*'
                else: vText += '\\sc ' + '['+css+']' + sText + '\\sc* ' # Use sc for unknown styles
                if sTail: vText += sTail.strip()

            elif subelement.tag == HaggaiXMLBible.breakTag:
                sublocation = "line break in " + location
                BibleOrgSysGlobals.checkXMLNoText( subelement, sublocation, 'c1d4' )
                BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sublocation, 'g4g8' )
                art = None
                for attrib,value in subelement.items():
                    if attrib=="art":
                        art = value
                    else: logging.warning( "Unprocessed {!r} attribute ({}) in style subelement".format( attrib, value ) )
                if BibleOrgSysGlobals.debugFlag: assert art == 'x-nl'
                #print( BBB, chapterNumber, verseNumber )
                #assert vText
                if vText:
                    thisBook.addLine( 'v', verseNumber + ' ' + vText ); verseNumber = None
                    vText = ''
                thisBook.addLine( 'm', subelement.tail.strip() if subelement.tail else '' )
                #bTail = subelement.tail
                #if bTail: vText = bTail.strip()
            else: logging.error( "Expected to find NOTE or STYLE but got {!r} in {}".format( subelement.tag, location ) )

        if vText: # This is the main text of the verse (follows the verse milestone)
            if '\n' in vText:
                print( "HaggaiXMLBible.__validateAndExtractVerse: vText {} {}:{} {!r}".format( BBB, chapterNumber, verseNumber, vText ) )
                vText = vText.replace( '\n', ' ' )
            thisBook.addLine( 'v', verseNumber + ' ' + vText ); verseNumber = None

Exemplo n.º 10

0

Exibir arquivo

Arquivo: USFM3MarkersConverter.py Projeto: pkabore/BibleOrgSys

class USFM3MarkersConverter:
    """
    Class for reading, validating, and converting USFM3Markers.
    This is only intended as a transitory class (used at start-up).
    The USFM3Markers class has functions more generally useful.
    """
    def __init__(
        self
    ) -> None:  # We can't give this parameters because of the singleton
        """
        Constructor: expects the filepath of the source XML file.
        Loads (and crudely validates the XML file) into an element tree.
        """
        self._filenameBase = 'USFM3Markers'

        # These fields are used for parsing the XML
        self._treeTag = 'USFM3Markers'
        self._headerTag = 'header'
        self._mainElementTag = 'USFMMarker'

        # These fields are used for automatically checking/validating the XML
        self._compulsoryAttributes = ()
        self._optionalAttributes = ()
        self._uniqueAttributes = self._compulsoryAttributes + self._optionalAttributes
        self._compulsoryElements = (
            'nameEnglish',
            'marker',
            'compulsory',
            'level',
            'highestNumberSuffix',
            'nests',
            'hasContent',
            'printed',
            'closed',
            'occursIn',
            'deprecated',
        )
        self._optionalElements = ('description', )
        #self._uniqueElements = self._compulsoryElements + self.optionalElements
        self._uniqueElements = (
            'nameEnglish',
            'marker',
        )

        # These are fields that we will fill later
        self._XMLheader, self._XMLTree = None, None
        self.__DataDicts = {}  # Used for import
        self.titleString = self.PROGRAM_VERSION = self.dateString = ''

    # end of __init__

    def loadAndValidate(self, XMLFileOrFilepath=None):
        """
        Loads (and crudely validates the XML file) into an element tree.
            Allows the filepath of the source XML file to be specified, otherwise uses the default.
        """
        if self._XMLTree is None:  # We mustn't have already have loaded the data
            if XMLFileOrFilepath is None:
                # XMLFileOrFilepath = BibleOrgSysGlobals.BOS_DATAFILES_FOLDERPATH.joinpath( self._filenameBase + '.xml' ) # Relative to module, not cwd
                import importlib.resources  # From Python 3.7 onwards -- handles zipped resources also
                XMLFileOrFilepath = importlib.resources.open_text(
                    'BibleOrgSys.DataFiles', self._filenameBase + '.xml')

            self.__load(XMLFileOrFilepath)
            if BibleOrgSysGlobals.strictCheckingFlag:
                self.__validate()
        else:  # The data must have been already loaded
            if XMLFileOrFilepath is not None and XMLFileOrFilepath != self.__XMLFileOrFilepath:
                logging.error(
                    _("Bible books codes are already loaded -- your different filepath of {!r} was ignored"
                      ).format(XMLFileOrFilepath))
        return self

    # end of loadAndValidate

    def __load(self, XMLFileOrFilepath):
        """
        Load the source XML file and remove the header from the tree.
        Also, extracts some useful elements from the header element.
        """
        assert XMLFileOrFilepath
        self.__XMLFileOrFilepath = XMLFileOrFilepath
        assert self._XMLTree is None or len(
            self._XMLTree) == 0  # Make sure we're not doing this twice

        vPrint(
            'Info', debuggingThisModule,
            _("Loading USFM3Markers XML file from {!r}…").format(
                self.__XMLFileOrFilepath))
        self._XMLTree = ElementTree().parse(self.__XMLFileOrFilepath)
        assert self._XMLTree  # Fail here if we didn't load anything at all

        if self._XMLTree.tag == self._treeTag:
            header = self._XMLTree[0]
            if header.tag == self._headerTag:
                self.XMLheader = header
                self._XMLTree.remove(header)
                BibleOrgSysGlobals.checkXMLNoText(header, 'header')
                BibleOrgSysGlobals.checkXMLNoTail(header, 'header')
                BibleOrgSysGlobals.checkXMLNoAttributes(header, 'header')
                if len(header) > 1:
                    logging.info(_("Unexpected elements in header"))
                elif len(header) == 0:
                    logging.info(_("Missing work element in header"))
                else:
                    work = header[0]
                    BibleOrgSysGlobals.checkXMLNoText(work, "work in header")
                    BibleOrgSysGlobals.checkXMLNoTail(work, "work in header")
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        work, "work in header")
                    if work.tag == "work":
                        self.PROGRAM_VERSION = work.find('version').text
                        self.dateString = work.find('date').text
                        self.titleString = work.find('title').text
                    else:
                        logging.warning(_("Missing work element in header"))
            else:
                logging.warning(
                    _("Missing header element (looking for {!r} tag)".format(
                        self._headerTag)))
            if header.tail is not None and header.tail.strip():
                logging.error(
                    _("Unexpected {!r} tail data after header").format(
                        element.tail))
        else:
            logging.error(
                _("Expected to load {!r} but got {!r}").format(
                    self._treeTag, self._XMLTree.tag))

    # end of __load

    def __validate(self):
        """
        Check/validate the loaded data.
        """
        assert self._XMLTree

        uniqueDict = {}
        for elementName in self._uniqueElements:
            uniqueDict["Element_" + elementName] = []
        for attributeName in self._uniqueAttributes:
            uniqueDict["Attribute_" + attributeName] = []

        expectedID = 1
        for j, element in enumerate(self._XMLTree):
            if element.tag == self._mainElementTag:
                BibleOrgSysGlobals.checkXMLNoText(element, element.tag)
                BibleOrgSysGlobals.checkXMLNoTail(element, element.tag)
                if not self._compulsoryAttributes and not self._optionalAttributes:
                    BibleOrgSysGlobals.checkXMLNoAttributes(
                        element, element.tag)
                if not self._compulsoryElements and not self._optionalElements:
                    BibleOrgSysGlobals.checkXMLNoSubelements(
                        element, element.tag)

                # Check compulsory attributes on this main element
                for attributeName in self._compulsoryAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is None:
                        logging.error(
                            _("Compulsory {!r} attribute is missing from {} element in record {}"
                              ).format(attributeName, element.tag, j))
                    if not attributeValue:
                        logging.warning(
                            _("Compulsory {!r} attribute is blank on {} element in record {}"
                              ).format(attributeName, element.tag, j))

                # Check optional attributes on this main element
                for attributeName in self._optionalAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is not None:
                        if not attributeValue:
                            logging.warning(
                                _("Optional {!r} attribute is blank on {} element in record {}"
                                  ).format(attributeName, element.tag, j))

                # Check for unexpected additional attributes on this main element
                for attributeName in element.keys():
                    attributeValue = element.get(attributeName)
                    if attributeName not in self._compulsoryAttributes and attributeName not in self._optionalAttributes:
                        logging.warning(
                            _("Additional {!r} attribute ({!r}) found on {} element in record {}"
                              ).format(attributeName, attributeValue,
                                       element.tag, j))

                # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes)
                for attributeName in self._uniqueAttributes:
                    attributeValue = element.get(attributeName)
                    if attributeValue is not None:
                        if attributeValue in uniqueDict["Attribute_" +
                                                        attributeName]:
                            logging.error(
                                _("Found {!r} data repeated in {!r} field on {} element in record {}"
                                  ).format(attributeValue, attributeName,
                                           element.tag, j))
                        uniqueDict["Attribute_" +
                                   attributeName].append(attributeValue)

                # Get the marker to use as a record ID
                marker = element.find("marker").text

                # Check compulsory elements
                for elementName in self._compulsoryElements:
                    if element.find(elementName) is None:
                        logging.error(
                            _("Compulsory {!r} element is missing in record with marker {!r} (record {})"
                              ).format(elementName, marker, j))
                    elif not element.find(elementName).text:
                        logging.warning(
                            _("Compulsory {!r} element is blank in record with marker {!r} (record {})"
                              ).format(elementName, marker, j))

                # Check optional elements
                for elementName in self._optionalElements:
                    if element.find(elementName) is not None:
                        if not element.find(elementName).text:
                            logging.warning(
                                _("Optional {!r} element is blank in record with marker {!r} (record {})"
                                  ).format(elementName, marker, j))

                # Check for unexpected additional elements
                for subelement in element:
                    if subelement.tag not in self._compulsoryElements and subelement.tag not in self._optionalElements:
                        logging.warning(
                            _("Additional {!r} element ({!r}) found in record with marker {!r} (record {})"
                              ).format(subelement.tag, subelement.text, marker,
                                       j))

                # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements)
                for elementName in self._uniqueElements:
                    if element.find(elementName) is not None:
                        text = element.find(elementName).text
                        if text in uniqueDict["Element_" + elementName]:
                            logging.error(
                                _("Found {!r} data repeated in {!r} element in record with marker {!r} (record {})"
                                  ).format(text, elementName, marker, j))
                        uniqueDict["Element_" + elementName].append(text)
            else:
                logging.warning(
                    _("Unexpected element: {} in record {}").format(
                        element.tag, j))
            if element.tail is not None and element.tail.strip():
                logging.error(
                    _("Unexpected {!r} tail data after {} element in record {}"
                      ).format(element.tail, element.tag, j))
        if self._XMLTree.tail is not None and self._XMLTree.tail.strip():
            logging.error(
                _("Unexpected {!r} tail data after {} element").format(
                    self._XMLTree.tail, self._XMLTree.tag))

    # end of __validate

    def __str__(self) -> str:
        """
        This method returns the string representation of a Bible book code.

        @return: the name of a Bible object formatted as a string
        @rtype: string
        """
        indent = 2
        result = "USFM3MarkersConverter object"
        if self.titleString:
            result += ('\n' if result else
                       '') + ' ' * indent + _("Title: {}").format(
                           self.titleString)
        if self.PROGRAM_VERSION:
            result += ('\n' if result else
                       '') + ' ' * indent + _("Version: {}").format(
                           self.PROGRAM_VERSION)
        if self.dateString:
            result += ('\n' if result else ''
                       ) + ' ' * indent + _("Date: {}").format(self.dateString)
        if self._XMLTree is not None:
            result += ('\n' if result else '') + ' ' * indent + _(
                "Number of entries = {:,}").format(len(self._XMLTree))
        return result

    # end of __str__

    def __len__(self):
        """ Returns the number of SFM markers loaded. """
        return len(self._XMLTree)

    # end of __len__

    def importDataToPython(self):
        """
        Loads (and pivots) the data (not including the header) into suitable Python containers to use in a Python program.
        (Of course, you can just use the elementTree in self._XMLTree if you prefer.)
        """
        assert self._XMLTree
        if self.__DataDicts:  # We've already done an import/restructuring -- no need to repeat it
            return self.__DataDicts

        # Load and validate entries and create the dictionaries and lists
        # Note that the combined lists include the numbered markers, e.g., s as well as s1, s2, …
        rawMarkerDict, numberedMarkerList, combinedMarkerDict, = {}, [], {}
        conversionDict, backConversionDict = {}, {}
        newlineMarkersList, numberedNewlineMarkersList, combinedNewlineMarkersList = [], [], []
        internalMarkersList, numberedInternalMarkersList, combinedInternalMarkersList = [], [], []
        noteMarkersList, deprecatedMarkersList = [], []
        for element in self._XMLTree:
            # Get the required information out of the tree for this element
            # Start with the compulsory elements
            nameEnglish = element.find(
                'nameEnglish'
            ).text  # This name is really just a comment element
            #dPrint( 'Quiet', debuggingThisModule, "Processing", nameEnglish )
            marker = element.find('marker').text
            if marker.lower() != marker:
                logging.error(
                    _("Marker {!r} should be lower case").format(marker))
            compulsory = element.find('compulsory').text
            if compulsory not in ('Yes', 'No'):
                logging.error(
                    _("Unexpected {!r} compulsory field for marker {!r}").
                    format(compulsory, marker))
            level = element.find('level').text
            compulsoryFlag = compulsory == 'Yes'
            if level == 'Newline':
                newlineMarkersList.append(marker)
                combinedNewlineMarkersList.append(marker)
            elif level == 'Internal':
                internalMarkersList.append(marker)
            elif level == 'Note':
                noteMarkersList.append(marker)
            else:
                logging.error(
                    _("Unexpected {!r} level field for marker {!r}").format(
                        level, marker))
            highestNumberSuffix = element.find('highestNumberSuffix').text
            if highestNumberSuffix not in ('None', '3', '4', '5', '6', '7',
                                           '8', '9'):
                logging.error(
                    _("Unexpected {!r} highestNumberSuffix field for marker {!r}"
                      ).format(highestNumberSuffix, marker))
            numberableFlag = highestNumberSuffix != 'None'
            if numberableFlag and level == 'Character':
                logging.error(
                    _("Unexpected {!r} highestNumberSuffix field for character marker {!r}"
                      ).format(highestNumberSuffix, marker))
            nests = element.find("nests").text
            if nests not in ('Yes', 'No'):
                logging.error(
                    _("Unexpected {!r} nests field for marker {!r}").format(
                        nests, marker))
            nestsFlag = nests == 'Yes'
            hasContent = element.find('hasContent').text
            if hasContent not in ('Always', 'Never', 'Sometimes'):
                logging.error(
                    _("Unexpected {!r} hasContent field for marker {!r}").
                    format(hasContent, marker))
            printed = element.find('printed').text
            if printed not in ('Yes', 'No'):
                logging.error(
                    _("Unexpected {!r} printed field for marker {!r}").format(
                        printed, marker))
            printedFlag = printed == 'Yes'
            closed = element.find('closed').text
            if closed not in ('No', 'Always', 'Self', 'Optional'):
                logging.error(
                    _("Unexpected {!r} closed field for marker {!r}").format(
                        closed, marker))
            occursIn = element.find('occursIn').text
            if occursIn not in ('Header', 'Introduction', 'Numbering', 'Text',
                                'Canonical Text', 'Poetry', 'Text, Poetry',
                                'Acrostic verse', 'Table row', 'Footnote',
                                'Cross-reference', 'Front and back matter'):
                logging.error(
                    _("Unexpected {!r} occursIn field for marker {!r}").format(
                        occursIn, marker))
            deprecated = element.find('deprecated').text
            if deprecated not in ('Yes', 'No'):
                logging.error(
                    _("Unexpected {!r} deprecated field for marker {!r}").
                    format(deprecated, marker))
            deprecatedFlag = deprecated == 'Yes'

            # The optional elements are set to None if they don't exist
            #closed = None if element.find("closed") is None else element.find("closed").text
            #if closed is not None and closed not in ( "No", "Always", "Optional" ): logging.error( _("Unexpected {!r} closed field for marker {!r}").format( closed, marker ) )
            #if level=="Character" and closed is None: logging.error( _("Entry for character marker {!r} doesn't have a \"closed\" field").format( marker ) )
            description = None if element.find(
                'description') is None else element.find('description').text
            if description is not None: assert description

            # Now put it into my dictionaries and lists for easy access
            #   The marker is lowercase by definition
            if 'marker' in self._uniqueElements:
                assert marker not in rawMarkerDict  # Shouldn't be any duplicates
            rawMarkerDict[marker] = {
                'compulsoryFlag': compulsoryFlag,
                'level': level,
                'highestNumberSuffix': highestNumberSuffix,
                'nestsFlag': nestsFlag,
                'hasContent': hasContent,
                'occursIn': occursIn,
                'printedFlag': printedFlag,
                'closed': closed,
                'deprecatedFlag': deprecatedFlag,
                'description': description,
                'nameEnglish': nameEnglish
            }
            combinedMarkerDict[marker] = marker
            if highestNumberSuffix != 'None':  # We have some extra work to do
                if marker.endswith('-s') or marker.endswith('-e'):
                    assert marker in ('qt-s', 'qt-e'
                                      )  # Only ones we know of so far
                    # Numberical suffix can't just be appended to the end of these
                    conversionDict[marker] = f'{marker[:-2]}1{marker[-2:]}'
                else:  # not a milestone start/end marker
                    conversionDict[marker] = marker + '1'
                for suffix in range(
                        1,
                        int(highestNumberSuffix) +
                        1):  # These are the suffix digits that we allow
                    if marker.endswith('-s') or marker.endswith('-e'):
                        # Numberical suffix can't just be appended to the end of these
                        numberedMarker = f'{marker[:-2]}{suffix}{marker[-2:]}'
                        #dPrint( 'Quiet', debuggingThisModule, f"Marker '{marker}' led to '{numberedMarker}'" )
                    else:  # not a milestone start/end marker
                        numberedMarker = marker + str(suffix)
                    backConversionDict[numberedMarker] = marker
                    numberedMarkerList.append(numberedMarker)
                    combinedMarkerDict[numberedMarker] = marker
                    if marker in newlineMarkersList:
                        numberedNewlineMarkersList.append(numberedMarker)
                        combinedNewlineMarkersList.append(numberedMarker)
                    else:
                        numberedInternalMarkersList.append(numberedMarker)
                        combinedInternalMarkersList.append(numberedMarker)
                    if deprecatedFlag:
                        deprecatedMarkersList.append(numberedMarker)
            else:  # it's not numberable
                numberedMarkerList.append(marker)
                if marker in newlineMarkersList:
                    numberedNewlineMarkersList.append(marker)
                else:
                    numberedInternalMarkersList.append(marker)
                if deprecatedFlag: deprecatedMarkersList.append(marker)

        #dPrint( 'Quiet', debuggingThisModule, conversionDict ); vPrint( 'Quiet', debuggingThisModule, backConversionDict )
        #dPrint( 'Quiet', debuggingThisModule, "newlineMarkersList", len(newlineMarkersList), newlineMarkersList )
        #dPrint( 'Quiet', debuggingThisModule, "numberedNewlineMarkersList", len(numberedNewlineMarkersList), numberedNewlineMarkersList )
        #dPrint( 'Quiet', debuggingThisModule, "combinedNewlineMarkersList", len(combinedNewlineMarkersList), combinedNewlineMarkersList )
        #dPrint( 'Quiet', debuggingThisModule, "internalMarkersList", len(internalMarkersList), internalMarkersList )
        #dPrint( 'Quiet', debuggingThisModule, "deprecatedMarkersList", len(deprecatedMarkersList), deprecatedMarkersList )
        self.__DataDicts = {
            "rawMarkerDict": rawMarkerDict,
            "numberedMarkerList": numberedMarkerList,
            "combinedMarkerDict": combinedMarkerDict,
            "conversionDict": conversionDict,
            "backConversionDict": backConversionDict,
            "newlineMarkersList": newlineMarkersList,
            "numberedNewlineMarkersList": numberedNewlineMarkersList,
            "combinedNewlineMarkersList": combinedNewlineMarkersList,
            "internalMarkersList": internalMarkersList,
            "numberedInternalMarkersList": numberedInternalMarkersList,
            "combinedInternalMarkersList": combinedInternalMarkersList,
            "noteMarkersList": noteMarkersList,
            "deprecatedMarkersList": deprecatedMarkersList,
        }
        return self.__DataDicts  # Just delete any of the dictionaries that you don't need

    # end of importDataToPython

    def pickle(self, filepath=None):
        """
        Writes the information tables to a .pickle file that can be easily loaded into a Python3 program.
        """
        import pickle

        assert self._XMLTree
        self.importDataToPython()
        assert self.__DataDicts

        if not filepath:
            folder = BibleOrgSysGlobals.DEFAULT_WRITEABLE_DERIVED_DATAFILES_FOLDERPATH
            if not os.path.exists(folder): os.mkdir(folder)
            filepath = os.path.join(folder,
                                    self._filenameBase + '_Tables.pickle')
        vPrint('Quiet', debuggingThisModule,
               _("Exporting to {}…").format(filepath))
        with open(filepath, 'wb') as myFile:
            pickle.dump(self.__DataDicts, myFile)

    # end of pickle

    def exportDataToPython(self, filepath=None):
        """
        Writes the information tables to a .py file that can be cut and pasted into a Python program.
        """
        def exportPythonDict(theFile, theDict, dictName, keyComment,
                             fieldsComment):
            """Exports theDict to theFile."""
            assert isinstance(theDict, dict)
            for dictKey in theDict.keys():  # Have to iterate this :(
                fieldsCount = len(theDict[dictKey]) if isinstance(
                    theDict[dictKey], (tuple, dict, list)) else 1
                break  # We only check the first (random) entry we get
            theFile.write(
                "{} = {{\n  # Key is {}\n  # Fields ({}) are: {}\n".format(
                    dictName, keyComment, fieldsCount, fieldsComment))
            for dictKey in sorted(theDict.keys()):
                theFile.write('  {}: {},\n'.format(repr(dictKey),
                                                   repr(theDict[dictKey])))
            theFile.write("}}\n# end of {} ({} entries)\n\n".format(
                dictName, len(theDict)))

        # end of exportPythonDict

        #def exportPythonOrderedDict( theFile, theDict, dictName, keyComment, fieldsComment ):
        #"""Exports theDict to theFile."""
        #assert isinstance( theDict, OrderedDict )
        #for dictKey in theDict.keys(): # Have to iterate this :(
        #fieldsCount = len( theDict[dictKey] ) if isinstance( theDict[dictKey], (tuple,dict,list) ) else 1
        #break # We only check the first (random) entry we get
        #theFile.write( '{} = OrderedDict([\n    # Key is {}\n    # Fields ({}) are: {}\n'.format( dictName, keyComment, fieldsCount, fieldsComment ) )
        #for dictKey in theDict.keys():
        #theFile.write( '  ({}, {}),\n'.format( repr(dictKey), repr(theDict[dictKey]) ) )
        #theFile.write( "]), # end of {} ({} entries)\n\n".format( dictName, len(theDict) ) )
        ## end of exportPythonOrderedDict

        def exportPythonList(theFile, theList, listName, dummy, fieldsComment):
            """Exports theList to theFile."""
            assert isinstance(theList, list)
            fieldsCount = len(theList[0]) if isinstance(
                theList[0], (tuple, dict, list)) else 1
            theFile.write('{} = [\n    # Fields ({}) are: {}\n'.format(
                listName, fieldsCount, fieldsComment))
            for j, entry in enumerate(theList):
                theFile.write('  {}, # {}\n'.format(repr(entry), j))
            theFile.write("], # end of {} ({} entries)\n\n".format(
                listName, len(theList)))

        # end of exportPythonList

        assert self._XMLTree
        self.importDataToPython()
        assert self.__DataDicts

        if not filepath:
            filepath = BibleOrgSysGlobals.DEFAULT_WRITEABLE_DERIVED_DATAFILES_FOLDERPATH.joinpath(
                self._filenameBase + '_Tables.py')
        vPrint('Quiet', debuggingThisModule,
               _("Exporting to {}…").format(filepath))
        with open(filepath, 'wt', encoding='utf-8') as myFile:
            myFile.write("# {}\n#\n".format(filepath))
            myFile.write(
                "# This UTF-8 file was automatically generated by USFM3Markers.py V{} on {}\n#\n"
                .format(PROGRAM_VERSION, datetime.now()))
            if self.titleString:
                myFile.write("# {} data\n".format(self.titleString))
            if self.PROGRAM_VERSION:
                myFile.write("#  Version: {}\n".format(self.PROGRAM_VERSION))
            if self.dateString:
                myFile.write("#  Date: {}\n#\n".format(self.dateString))
            myFile.write(
                "#   {} {} loaded from the original XML file.\n#\n\n".format(
                    len(self._XMLTree), self._treeTag))
            #myFile.write( "from collections import OrderedDict\n\n" )
            dictInfo = {
                "rawMarkerDict":
                (exportPythonDict, "rawMarker (in the original XML order)",
                 "specified"),
                "numberedMarkerList":
                (exportPythonList, "marker", "rawMarker"),
                "combinedMarkerDict":
                (exportPythonDict, "marker", "rawMarker"),
                "conversionDict":
                (exportPythonDict, "rawMarker", "numberedMarker"),
                "backConversionDict": (exportPythonDict, "numberedMarker",
                                       "rawMarker"),
                "newlineMarkersList": (exportPythonList, "", "rawMarker"),
                "numberedNewlineMarkersList": (exportPythonList, "",
                                               "rawMarker"),
                "combinedNewlineMarkersList": (exportPythonList, "",
                                               "rawMarker"),
                "internalMarkersList": (exportPythonList, "", "rawMarker"),
                "numberedInternalMarkersList": (exportPythonList, "",
                                                "rawMarker"),
                "combinedInternalMarkersList": (exportPythonList, "",
                                                "rawMarker"),
                "noteMarkersList": (exportPythonList, "", "rawMarker"),
                "deprecatedMarkersList": (exportPythonList, "", "rawMarker")
            }
            for dictName in self.__DataDicts:
                exportFunction, keyComment, fieldsComment = dictInfo[dictName]
                exportFunction(myFile, self.__DataDicts[dictName], dictName,
                               keyComment, fieldsComment)
            myFile.write("# end of {}".format(os.path.basename(filepath)))

    # end of exportDataToPython

    def exportDataToJSON(self, filepath=None):
        """
        Writes the information tables to a .json file that can be easily loaded into a Java program.

        See http://en.wikipedia.org/wiki/JSON.
        """
        import json

        assert self._XMLTree
        self.importDataToPython()
        assert self.__DataDicts

        if not filepath:
            filepath = BibleOrgSysGlobals.DEFAULT_WRITEABLE_DERIVED_DATAFILES_FOLDERPATH.joinpath(
                self._filenameBase + '_Tables.json')
        vPrint('Quiet', debuggingThisModule,
               _("Exporting to {}…").format(filepath))
        with open(filepath, 'wt', encoding='utf-8') as myFile:
            json.dump(self.__DataDicts, myFile, ensure_ascii=False, indent=2)

    # end of exportDataToJSON

    def exportDataToC(self, filepath=None):
        """
        Writes the information tables to a .h and .c files that can be included in c and c++ programs.

        NOTE: The (optional) filepath should not have the file extension specified -- this is added automatically.
        """
        def exportPythonDict(hFile, cFile, theDict, dictName, sortedBy,
                             structure):
            """ Exports theDict to the .h and .c files. """
            def convertEntry(entry):
                """ Convert special characters in an entry… """
                result = ""
                if isinstance(entry, tuple):
                    for field in entry:
                        if result: result += ", "  # Separate the fields
                        if field is None: result += '""'
                        elif isinstance(field, str):
                            result += '"' + str(field).replace('"',
                                                               '\\"') + '"'
                        elif isinstance(field, int):
                            result += str(field)
                        else:
                            logging.error(
                                _("Cannot convert unknown field type {!r} in entry {!r}"
                                  ).format(field, entry))
                elif isinstance(entry, dict):
                    for key in sorted(entry.keys()):
                        field = entry[key]
                        if result: result += ", "  # Separate the fields
                        if field is None: result += '""'
                        elif isinstance(field, str):
                            result += '"' + str(field).replace('"',
                                                               '\\"') + '"'
                        elif isinstance(field, int):
                            result += str(field)
                        else:
                            logging.error(
                                _("Cannot convert unknown field type {!r} in entry {!r}"
                                  ).format(field, entry))
                else:
                    logging.error(
                        _("Can't handle this type of entry yet: {}").format(
                            repr(entry)))
                return result

            # end of convertEntry

            for dictKey in theDict.keys():  # Have to iterate this :(
                fieldsCount = len(
                    theDict[dictKey]
                ) + 1  # Add one since we include the key in the count
                break  # We only check the first (random) entry we get

            #hFile.write( "typedef struct {}EntryStruct { {} } {}Entry;\n\n".format( dictName, structure, dictName ) )
            hFile.write("typedef struct {}EntryStruct {{\n".format(dictName))
            for declaration in structure.split(';'):
                adjDeclaration = declaration.strip()
                if adjDeclaration:
                    hFile.write("    {};\n".format(adjDeclaration))
            hFile.write("}} {}Entry;\n\n".format(dictName))

            cFile.write(
                "const static {}Entry\n {}[{}] = {{\n  // Fields ({}) are {}\n  // Sorted by {}\n"
                .format(dictName, dictName, len(theDict), fieldsCount,
                        structure, sortedBy))
            for dictKey in sorted(theDict.keys()):
                if isinstance(dictKey, str):
                    cFile.write("  {{\"{}\", {}}},\n".format(
                        dictKey, convertEntry(theDict[dictKey])))
                elif isinstance(dictKey, int):
                    cFile.write("  {{{}, {}}},\n".format(
                        dictKey, convertEntry(theDict[dictKey])))
                else:
                    logging.error(
                        _("Can't handle this type of key data yet: {}").format(
                            dictKey))
            cFile.write("]}}; // {} ({} entries)\n\n".format(
                dictName, len(theDict)))

        # end of exportPythonDict

        assert self._XMLTree
        self.importDataToPython()
        assert self.__DataDicts

        raise Exception("C export not written yet, sorry.")
        if not filepath:
            filepath = BibleOrgSysGlobals.DEFAULT_WRITEABLE_DERIVED_DATAFILES_FOLDERPATH.joinpath(
                self._filenameBase + '_Tables')
        hFilepath = filepath + '.h'
        cFilepath = filepath + '.c'
        vPrint('Quiet', debuggingThisModule,
               _("Exporting to {}…").format(
                   cFilepath))  # Don't bother telling them about the .h file
        ifdefName = self._filenameBase.upper() + "_Tables_h"

        with open( hFilepath, 'wt', encoding='utf-8' ) as myHFile, \
             open( cFilepath, 'wt', encoding='utf-8' ) as myCFile:
            myHFile.write("// {}\n//\n".format(hFilepath))
            myCFile.write("// {}\n//\n".format(cFilepath))
            lines = "// This UTF-8 file was automatically generated by USFM3Markers.py V{} on {}\n//\n".format(
                PROGRAM_VERSION, datetime.now())
            myHFile.write(lines)
            myCFile.write(lines)
            if self.titleString:
                lines = "// {} data\n".format(self.titleString)
                myHFile.write(lines)
                myCFile.write(lines)
            if self.PROGRAM_VERSION:
                lines = "//  Version: {}\n".format(self.PROGRAM_VERSION)
                myHFile.write(lines)
                myCFile.write(lines)
            if self.dateString:
                lines = "//  Date: {}\n//\n".format(self.dateString)
                myHFile.write(lines)
                myCFile.write(lines)
            myCFile.write(
                "//   {} {} loaded from the original XML file.\n//\n\n".format(
                    len(self._XMLTree), self._treeTag))
            myHFile.write("\n#ifndef {}\n#define {}\n\n".format(
                ifdefName, ifdefName))
            myCFile.write('#include "{}"\n\n'.format(
                os.path.basename(hFilepath)))

            CHAR = "const unsigned char"
            BYTE = "const int"
            dictInfo = {
                "referenceNumberDict":
                ("referenceNumber (integer 1..255)",
                 "{} referenceNumber; {}* ByzantineAbbreviation; {}* CCELNumberString; {}* NETBibleAbbreviation; {}* OSISAbbreviation; {} ParatextAbbreviation[3+1]; {} ParatextNumberString[2+1]; {}* SBLAbbreviation; {}* SwordAbbreviation; {}* nameEnglish; {}* numExpectedChapters; {}* possibleAlternativeBooks; {} marker[3+1];"
                 .format(BYTE, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR,
                         CHAR, CHAR, CHAR, CHAR)),
                "rawMarkerDict":
                ("marker",
                 "{} marker[3+1]; {}* ByzantineAbbreviation; {}* CCELNumberString; {} referenceNumber; {}* NETBibleAbbreviation; {}* OSISAbbreviation; {} ParatextAbbreviation[3+1]; {} ParatextNumberString[2+1]; {}* SBLAbbreviation; {}* SwordAbbreviation; {}* nameEnglish; {}* numExpectedChapters; {}* possibleAlternativeBooks;"
                 .format(CHAR, CHAR, CHAR, BYTE, CHAR, CHAR, CHAR, CHAR, CHAR,
                         CHAR, CHAR, CHAR, CHAR)),
                "CCELDict":
                ("CCELNumberString",
                 "{}* CCELNumberString; {} referenceNumber; {} marker[3+1];".
                 format(CHAR, BYTE, CHAR)),
                "SBLDict":
                ("SBLAbbreviation",
                 "{}* SBLAbbreviation; {} referenceNumber; {} marker[3+1];".
                 format(CHAR, BYTE, CHAR)),
                "EnglishNameDict":
                ("nameEnglish",
                 "{}* nameEnglish; {} referenceNumber; {} marker[3+1];".format(
                     CHAR, BYTE, CHAR))
            }

            for dictName, dictData in self.__DataDicts.items():
                exportPythonDict(myHFile, myCFile, dictData, dictName,
                                 dictInfo[dictName][0], dictInfo[dictName][1])

            myHFile.write("#endif // {}\n\n".format(ifdefName))
            myHFile.write("// end of {}".format(os.path.basename(hFilepath)))
            myCFile.write("// end of {}".format(os.path.basename(cFilepath)))

Exemplo n.º 11

0

Exibir arquivo

class USFMMarkersConverter:
    """
    Class for reading, validating, and converting USFMMarkers.
    This is only intended as a transitory class (used at start-up).
    The USFMMarkers class has functions more generally useful.
    """

    def __init__( self ): # We can't give this parameters because of the singleton
        """
        Constructor: expects the filepath of the source XML file.
        Loads (and crudely validates the XML file) into an element tree.
        """
        self._filenameBase = "USFMMarkers"

        # These fields are used for parsing the XML
        self._treeTag = "USFMMarkers"
        self._headerTag = "header"
        self._mainElementTag = "USFMMarker"

        # These fields are used for automatically checking/validating the XML
        self._compulsoryAttributes = ()
        self._optionalAttributes = ()
        self._uniqueAttributes = self._compulsoryAttributes + self._optionalAttributes
        self._compulsoryElements = ( "nameEnglish", "marker", "compulsory", "level", "numberable", "nests", "hasContent", "printed", "closed", "occursIn", "deprecated", )
        self._optionalElements = ( "description", )
        #self._uniqueElements = self._compulsoryElements + self.optionalElements
        self._uniqueElements = ( "nameEnglish", "marker", )

        # These are fields that we will fill later
        self._XMLheader, self._XMLtree = None, None
        self.__DataDicts = {} # Used for import
        self.titleString = self.ProgVersion = self.dateString = ''
    # end of __init__

    def loadAndValidate( self, XMLFilepath=None ):
        """
        Loads (and crudely validates the XML file) into an element tree.
            Allows the filepath of the source XML file to be specified, otherwise uses the default.
        """
        if self._XMLtree is None: # We mustn't have already have loaded the data
            if XMLFilepath is None:
                XMLFilepath = os.path.join( os.path.dirname(__file__), "DataFiles", self._filenameBase + ".xml" ) # Relative to module, not cwd
            self.__load( XMLFilepath )
            if BibleOrgSysGlobals.strictCheckingFlag:
                self.__validate()
        else: # The data must have been already loaded
            if XMLFilepath is not None and XMLFilepath!=self.__XMLFilepath: logging.error( _("Bible books codes are already loaded -- your different filepath of {!r} was ignored").format( XMLFilepath ) )
        return self
    # end of loadAndValidate

    def __load( self, XMLFilepath ):
        """
        Load the source XML file and remove the header from the tree.
        Also, extracts some useful elements from the header element.
        """
        assert XMLFilepath
        self.__XMLFilepath = XMLFilepath
        assert self._XMLtree is None or len(self._XMLtree)==0 # Make sure we're not doing this twice

        if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading USFMMarkers XML file from {!r}…").format( self.__XMLFilepath ) )
        self._XMLtree = ElementTree().parse( self.__XMLFilepath )
        assert self._XMLtree # Fail here if we didn't load anything at all

        if self._XMLtree.tag == self._treeTag:
            header = self._XMLtree[0]
            if header.tag == self._headerTag:
                self.XMLheader = header
                self._XMLtree.remove( header )
                BibleOrgSysGlobals.checkXMLNoText( header, "header" )
                BibleOrgSysGlobals.checkXMLNoTail( header, "header" )
                BibleOrgSysGlobals.checkXMLNoAttributes( header, "header" )
                if len(header)>1:
                    logging.info( _("Unexpected elements in header") )
                elif len(header)==0:
                    logging.info( _("Missing work element in header") )
                else:
                    work = header[0]
                    BibleOrgSysGlobals.checkXMLNoText( work, "work in header" )
                    BibleOrgSysGlobals.checkXMLNoTail( work, "work in header" )
                    BibleOrgSysGlobals.checkXMLNoAttributes( work, "work in header" )
                    if work.tag == "work":
                        self.ProgVersion = work.find('version').text
                        self.dateString = work.find("date").text
                        self.titleString = work.find("title").text
                    else:
                        logging.warning( _("Missing work element in header") )
            else:
                logging.warning( _("Missing header element (looking for {!r} tag)".format( self._headerTag ) ) )
            if header.tail is not None and header.tail.strip(): logging.error( _("Unexpected {!r} tail data after header").format( element.tail ) )
        else:
            logging.error( _("Expected to load {!r} but got {!r}").format( self._treeTag, self._XMLtree.tag ) )
    # end of __load

    def __validate( self ):
        """
        Check/validate the loaded data.
        """
        assert self._XMLtree

        uniqueDict = {}
        for elementName in self._uniqueElements: uniqueDict["Element_"+elementName] = []
        for attributeName in self._uniqueAttributes: uniqueDict["Attribute_"+attributeName] = []

        expectedID = 1
        for j,element in enumerate(self._XMLtree):
            if element.tag == self._mainElementTag:
                BibleOrgSysGlobals.checkXMLNoText( element, element.tag )
                BibleOrgSysGlobals.checkXMLNoTail( element, element.tag )
                if not self._compulsoryAttributes and not self._optionalAttributes: BibleOrgSysGlobals.checkXMLNoAttributes( element, element.tag )
                if not self._compulsoryElements and not self._optionalElements: BibleOrgSysGlobals.checkXMLNoSubelements( element, element.tag )

                # Check compulsory attributes on this main element
                for attributeName in self._compulsoryAttributes:
                    attributeValue = element.get( attributeName )
                    if attributeValue is None:
                        logging.error( _("Compulsory {!r} attribute is missing from {} element in record {}").format( attributeName, element.tag, j ) )
                    if not attributeValue:
                        logging.warning( _("Compulsory {!r} attribute is blank on {} element in record {}").format( attributeName, element.tag, j ) )

                # Check optional attributes on this main element
                for attributeName in self._optionalAttributes:
                    attributeValue = element.get( attributeName )
                    if attributeValue is not None:
                        if not attributeValue:
                            logging.warning( _("Optional {!r} attribute is blank on {} element in record {}").format( attributeName, element.tag, j ) )

                # Check for unexpected additional attributes on this main element
                for attributeName in element.keys():
                    attributeValue = element.get( attributeName )
                    if attributeName not in self._compulsoryAttributes and attributeName not in self._optionalAttributes:
                        logging.warning( _("Additional {!r} attribute ({!r}) found on {} element in record {}").format( attributeName, attributeValue, element.tag, j ) )

                # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes)
                for attributeName in self._uniqueAttributes:
                    attributeValue = element.get( attributeName )
                    if attributeValue is not None:
                        if attributeValue in uniqueDict["Attribute_"+attributeName]:
                            logging.error( _("Found {!r} data repeated in {!r} field on {} element in record {}").format( attributeValue, attributeName, element.tag, j ) )
                        uniqueDict["Attribute_"+attributeName].append( attributeValue )

                # Get the marker to use as a record ID
                marker = element.find("marker").text

                # Check compulsory elements
                for elementName in self._compulsoryElements:
                    if element.find( elementName ) is None:
                        logging.error( _("Compulsory {!r} element is missing in record with marker {!r} (record {})").format( elementName, marker, j ) )
                    elif not element.find( elementName ).text:
                        logging.warning( _("Compulsory {!r} element is blank in record with marker {!r} (record {})").format( elementName, marker, j ) )

                # Check optional elements
                for elementName in self._optionalElements:
                    if element.find( elementName ) is not None:
                        if not element.find( elementName ).text:
                            logging.warning( _("Optional {!r} element is blank in record with marker {!r} (record {})").format( elementName, marker, j ) )

                # Check for unexpected additional elements
                for subelement in element:
                    if subelement.tag not in self._compulsoryElements and subelement.tag not in self._optionalElements:
                        logging.warning( _("Additional {!r} element ({!r}) found in record with marker {!r} (record {})").format( subelement.tag, subelement.text, marker, j ) )

                # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements)
                for elementName in self._uniqueElements:
                    if element.find( elementName ) is not None:
                        text = element.find( elementName ).text
                        if text in uniqueDict["Element_"+elementName]:
                            logging.error( _("Found {!r} data repeated in {!r} element in record with marker {!r} (record {})").format( text, elementName, marker, j ) )
                        uniqueDict["Element_"+elementName].append( text )
            else:
                logging.warning( _("Unexpected element: {} in record {}").format( element.tag, j ) )
            if element.tail is not None and element.tail.strip(): logging.error( _("Unexpected {!r} tail data after {} element in record {}").format( element.tail, element.tag, j ) )
        if self._XMLtree.tail is not None and self._XMLtree.tail.strip(): logging.error( _("Unexpected {!r} tail data after {} element").format( self._XMLtree.tail, self._XMLtree.tag ) )
    # end of __validate

    def __str__( self ):
        """
        This method returns the string representation of a Bible book code.

        @return: the name of a Bible object formatted as a string
        @rtype: string
        """
        indent = 2
        result = "USFMMarkersConverter object"
        if self.titleString: result += ('\n' if result else '') + ' '*indent + _("Title: {}").format( self.titleString )
        if self.ProgVersion: result += ('\n' if result else '') + ' '*indent + _("Version: {}").format( self.ProgVersion )
        if self.dateString: result += ('\n' if result else '') + ' '*indent + _("Date: {}").format( self.dateString )
        if self._XMLtree is not None: result += ('\n' if result else '') + ' '*indent + _("Number of entries = {}").format( len(self._XMLtree) )
        return result
    # end of __str__

    def __len__( self ):
        """ Returns the number of SFM markers loaded. """
        return len( self._XMLtree )
    # end of __len__

    def importDataToPython( self ):
        """
        Loads (and pivots) the data (not including the header) into suitable Python containers to use in a Python program.
        (Of course, you can just use the elementTree in self._XMLtree if you prefer.)
        """
        assert self._XMLtree
        if self.__DataDicts: # We've already done an import/restructuring -- no need to repeat it
            return self.__DataDicts

        # Load and validate entries and create the dictionaries and lists
        # Note that the combined lists include the numbered markers, e.g., s as well as s1, s2, …
        rawMarkerDict, numberedMarkerList, combinedMarkerDict, = OrderedDict(), [], {}
        conversionDict, backConversionDict = {}, {}
        newlineMarkersList, numberedNewlineMarkersList, combinedNewlineMarkersList = [], [], []
        internalMarkersList, numberedInternalMarkersList, combinedInternalMarkersList = [], [], []
        noteMarkersList, deprecatedMarkersList = [], []
        for element in self._XMLtree:
            # Get the required information out of the tree for this element
            # Start with the compulsory elements
            nameEnglish = element.find('nameEnglish').text # This name is really just a comment element
            marker = element.find('marker').text
            if marker.lower() != marker:
                logging.error( _("Marker {!r} should be lower case").format( marker ) )
            compulsory = element.find('compulsory').text
            if  compulsory not in ( 'Yes', 'No' ): logging.error( _("Unexpected {!r} compulsory field for marker {!r}").format( compulsory, marker ) )
            level = element.find('level').text
            compulsoryFlag = compulsory == 'Yes'
            if  level == 'Newline': newlineMarkersList.append( marker ); combinedNewlineMarkersList.append( marker )
            elif level == 'Internal': internalMarkersList.append( marker )
            elif level == 'Note': noteMarkersList.append( marker )
            else: logging.error( _("Unexpected {!r} level field for marker {!r}").format( level, marker ) )
            numberable = element.find('numberable').text
            if  numberable not in ( 'Yes', 'No' ): logging.error( _("Unexpected {!r} numberable field for marker {!r}").format( numberable, marker ) )
            numberableFlag = numberable == "Yes"
            if numberableFlag and level == "Character": logging.error( _("Unexpected {!r} numberable field for character marker {!r}").format( numberable, marker ) )
            nests = element.find("nests").text
            if  nests not in ( 'Yes', 'No' ): logging.error( _("Unexpected {!r} nests field for marker {!r}").format( nests, marker ) )
            nestsFlag = nests == 'Yes'
            hasContent = element.find('hasContent').text
            if  hasContent not in ( 'Always', 'Never', 'Sometimes' ): logging.error( _("Unexpected {!r} hasContent field for marker {!r}").format( hasContent, marker ) )
            printed = element.find('printed').text
            if  printed not in ( 'Yes', 'No' ): logging.error( _("Unexpected {!r} printed field for marker {!r}").format( printed, marker ) )
            printedFlag = printed == 'Yes'
            closed = element.find('closed').text
            if  closed not in ( 'No', 'Always', 'Optional' ): logging.error( _("Unexpected {!r} closed field for marker {!r}").format( closed, marker ) )
            occursIn = element.find('occursIn').text
            if  occursIn not in ( 'Header', 'Introduction', 'Numbering', 'Text', 'Canonical Text', 'Poetry', 'Text, Poetry', 'Acrostic verse', 'Table row', 'Footnote', 'Cross-reference', 'Front and back matter' ):
                logging.error( _("Unexpected {!r} occursIn field for marker {!r}").format( occursIn, marker ) )
            deprecated = element.find('deprecated').text
            if  deprecated not in ( 'Yes', 'No' ): logging.error( _("Unexpected {!r} deprecated field for marker {!r}").format( deprecated, marker ) )
            deprecatedFlag = deprecated == 'Yes'

            # The optional elements are set to None if they don't exist
            #closed = None if element.find("closed") is None else element.find("closed").text
            #if closed is not None and closed not in ( "No", "Always", "Optional" ): logging.error( _("Unexpected {!r} closed field for marker {!r}").format( closed, marker ) )
            #if level=="Character" and closed is None: logging.error( _("Entry for character marker {!r} doesn't have a \"closed\" field").format( marker ) )
            description = None if element.find("description") is None else element.find("description").text
            if description is not None: assert description

            # Now put it into my dictionaries and lists for easy access
            #   The marker is lowercase by definition
            if "marker" in self._uniqueElements: assert marker not in rawMarkerDict # Shouldn't be any duplicates
            rawMarkerDict[marker] = { "compulsoryFlag":compulsoryFlag, "level":level, "numberableFlag":numberableFlag, "nestsFlag":nestsFlag,
                                        "hasContent":hasContent, "occursIn":occursIn, "printedFlag":printedFlag, "closed":closed, "deprecatedFlag":deprecatedFlag,
                                        "description":description, "nameEnglish":nameEnglish }
            combinedMarkerDict[marker] = marker
            if numberableFlag: # We have some extra work to do
                conversionDict[marker] = marker + '1'
                for suffix in '1234': # These are the suffix digits that we allow
                    numberedMarker = marker + suffix
                    backConversionDict[numberedMarker] = marker
                    numberedMarkerList.append( numberedMarker )
                    combinedMarkerDict[numberedMarker] = marker
                    if marker in newlineMarkersList: numberedNewlineMarkersList.append( numberedMarker ); combinedNewlineMarkersList.append( numberedMarker )
                    else: numberedInternalMarkersList.append( numberedMarker ); combinedInternalMarkersList.append( numberedMarker )
                    if deprecatedFlag: deprecatedMarkersList.append( numberedMarker )
            else: # it's not numberable
                numberedMarkerList.append( marker )
                if marker in newlineMarkersList: numberedNewlineMarkersList.append( marker )
                else: numberedInternalMarkersList.append( marker )
                if deprecatedFlag: deprecatedMarkersList.append( marker )

        #print( conversionDict ); print( backConversionDict )
        #print( "newlineMarkersList", len(newlineMarkersList), newlineMarkersList )
        #print( "numberedNewlineMarkersList", len(numberedNewlineMarkersList), numberedNewlineMarkersList )
        #print( "combinedNewlineMarkersList", len(combinedNewlineMarkersList), combinedNewlineMarkersList )
        #print( "internalMarkersList", len(internalMarkersList), internalMarkersList )
        #print( "deprecatedMarkersList", len(deprecatedMarkersList), deprecatedMarkersList )
        self.__DataDicts = { "rawMarkerDict":rawMarkerDict, "numberedMarkerList":numberedMarkerList, "combinedMarkerDict":combinedMarkerDict,
                                "conversionDict":conversionDict, "backConversionDict":backConversionDict,
                                "newlineMarkersList":newlineMarkersList, "numberedNewlineMarkersList":numberedNewlineMarkersList, "combinedNewlineMarkersList":combinedNewlineMarkersList,
                                "internalMarkersList":internalMarkersList, "numberedInternalMarkersList":numberedInternalMarkersList, "combinedInternalMarkersList":combinedInternalMarkersList,
                                "noteMarkersList":noteMarkersList, "deprecatedMarkersList":deprecatedMarkersList, }
        return self.__DataDicts # Just delete any of the dictionaries that you don't need
    # end of importDataToPython

    def pickle( self, filepath=None ):
        """
        Writes the information tables to a .pickle file that can be easily loaded into a Python3 program.
        """
        import pickle

        assert self._XMLtree
        self.importDataToPython()
        assert self.__DataDicts

        if not filepath:
            folder = os.path.join( os.path.split(self.__XMLFilepath)[0], "DerivedFiles/" )
            if not os.path.exists( folder ): os.mkdir( folder )
            filepath = os.path.join( folder, self._filenameBase + "_Tables.pickle" )
        if BibleOrgSysGlobals.verbosityLevel > 1: print( _("Exporting to {}…").format( filepath ) )
        with open( filepath, 'wb' ) as myFile:
            pickle.dump( self.__DataDicts, myFile )
    # end of pickle

    def exportDataToPython( self, filepath=None ):
        """
        Writes the information tables to a .py file that can be cut and pasted into a Python program.
        """
        def exportPythonDict( theFile, theDict, dictName, keyComment, fieldsComment ):
            """Exports theDict to theFile."""
            assert isinstance( theDict, dict )
            for dictKey in theDict.keys(): # Have to iterate this :(
                fieldsCount = len( theDict[dictKey] ) if isinstance( theDict[dictKey], (tuple,dict,list) ) else 1
                break # We only check the first (random) entry we get
            theFile.write( "{} = {{\n  # Key is {}\n  # Fields ({}) are: {}\n".format( dictName, keyComment, fieldsCount, fieldsComment ) )
            for dictKey in sorted(theDict.keys()):
                theFile.write( '  {}: {},\n'.format( repr(dictKey), repr(theDict[dictKey]) ) )
            theFile.write( "}}\n# end of {} ({} entries)\n\n".format( dictName, len(theDict) ) )
        # end of exportPythonDict

        def exportPythonOrderedDict( theFile, theDict, dictName, keyComment, fieldsComment ):
            """Exports theDict to theFile."""
            assert isinstance( theDict, OrderedDict )
            for dictKey in theDict.keys(): # Have to iterate this :(
                fieldsCount = len( theDict[dictKey] ) if isinstance( theDict[dictKey], (tuple,dict,list) ) else 1
                break # We only check the first (random) entry we get
            theFile.write( '{} = OrderedDict([\n    # Key is {}\n    # Fields ({}) are: {}\n'.format( dictName, keyComment, fieldsCount, fieldsComment ) )
            for dictKey in theDict.keys():
                theFile.write( '  ({}, {}),\n'.format( repr(dictKey), repr(theDict[dictKey]) ) )
            theFile.write( "]), # end of {} ({} entries)\n\n".format( dictName, len(theDict) ) )
        # end of exportPythonDict

        def exportPythonList( theFile, theList, listName, dummy, fieldsComment ):
            """Exports theList to theFile."""
            assert isinstance( theList, list )
            fieldsCount = len( theList[0] ) if isinstance( theList[0], (tuple,dict,list) ) else 1
            theFile.write( '{} = [\n    # Fields ({}) are: {}\n'.format( listName, fieldsCount, fieldsComment ) )
            for j,entry in enumerate(theList):
                theFile.write( '  {}, # {}\n'.format( repr(entry), j ) )
            theFile.write( "], # end of {} ({} entries)\n\n".format( listName, len(theList) ) )
        # end of exportPythonList

        assert self._XMLtree
        self.importDataToPython()
        assert self.__DataDicts

        if not filepath: filepath = os.path.join( os.path.split(self.__XMLFilepath)[0], "DerivedFiles", self._filenameBase + "_Tables.py" )
        if BibleOrgSysGlobals.verbosityLevel > 1: print( _("Exporting to {}…").format( filepath ) )
        with open( filepath, 'wt', encoding='utf-8' ) as myFile:
            myFile.write( "# {}\n#\n".format( filepath ) )
            myFile.write( "# This UTF-8 file was automatically generated by USFMMarkers.py V{} on {}\n#\n".format( ProgVersion, datetime.now() ) )
            if self.titleString: myFile.write( "# {} data\n".format( self.titleString ) )
            if self.ProgVersion: myFile.write( "#  Version: {}\n".format( self.ProgVersion ) )
            if self.dateString: myFile.write( "#  Date: {}\n#\n".format( self.dateString ) )
            myFile.write( "#   {} {} loaded from the original XML file.\n#\n\n".format( len(self._XMLtree), self._treeTag ) )
            myFile.write( "from collections import OrderedDict\n\n" )
            dictInfo = { "rawMarkerDict":(exportPythonOrderedDict, "rawMarker (in the original XML order)","specified"),
                            "numberedMarkerList":(exportPythonList, "marker","rawMarker"),
                            "combinedMarkerDict":(exportPythonDict, "marker","rawMarker"),
                            "conversionDict":(exportPythonDict, "rawMarker","numberedMarker"),
                            "backConversionDict":(exportPythonDict, "numberedMarker","rawMarker"),
                            "newlineMarkersList":(exportPythonList, "","rawMarker"),
                            "numberedNewlineMarkersList":(exportPythonList, "","rawMarker"),
                            "combinedNewlineMarkersList":(exportPythonList, "","rawMarker"),
                            "internalMarkersList":(exportPythonList, "","rawMarker"),
                            "numberedInternalMarkersList":(exportPythonList, "","rawMarker"),
                            "combinedInternalMarkersList":(exportPythonList, "","rawMarker"),
                            "noteMarkersList":(exportPythonList, "","rawMarker"),
                            "deprecatedMarkersList":(exportPythonList, "","rawMarker") }
            for dictName in self.__DataDicts:
                exportFunction, keyComment, fieldsComment = dictInfo[dictName]
                exportFunction( myFile, self.__DataDicts[dictName], dictName, keyComment, fieldsComment )
            myFile.write( "# end of {}".format( os.path.basename(filepath) ) )
    # end of exportDataToPython

    def exportDataToJSON( self, filepath=None ):
        """
        Writes the information tables to a .json file that can be easily loaded into a Java program.

        See http://en.wikipedia.org/wiki/JSON.
        """
        import json

        assert self._XMLtree
        self.importDataToPython()
        assert self.__DataDicts

        if not filepath: filepath = os.path.join( os.path.split(self.__XMLFilepath)[0], "DerivedFiles", self._filenameBase + "_Tables.json" )
        if BibleOrgSysGlobals.verbosityLevel > 1: print( _("Exporting to {}…").format( filepath ) )
        with open( filepath, 'wt', encoding='utf-8' ) as myFile:
            json.dump( self.__DataDicts, myFile, indent=2 )
    # end of exportDataToJSON

    def exportDataToC( self, filepath=None ):
        """
        Writes the information tables to a .h and .c files that can be included in c and c++ programs.

        NOTE: The (optional) filepath should not have the file extension specified -- this is added automatically.
        """
        def exportPythonDict( hFile, cFile, theDict, dictName, sortedBy, structure ):
            """ Exports theDict to the .h and .c files. """
            def convertEntry( entry ):
                """ Convert special characters in an entry… """
                result = ""
                if isinstance( entry, tuple ):
                    for field in entry:
                        if result: result += ", " # Separate the fields
                        if field is None: result += '""'
                        elif isinstance( field, str): result += '"' + str(field).replace('"','\\"') + '"'
                        elif isinstance( field, int): result += str(field)
                        else: logging.error( _("Cannot convert unknown field type {!r} in entry {!r}").format( field, entry ) )
                elif isinstance( entry, dict ):
                    for key in sorted(entry.keys()):
                        field = entry[key]
                        if result: result += ", " # Separate the fields
                        if field is None: result += '""'
                        elif isinstance( field, str): result += '"' + str(field).replace('"','\\"') + '"'
                        elif isinstance( field, int): result += str(field)
                        else: logging.error( _("Cannot convert unknown field type {!r} in entry {!r}").format( field, entry ) )
                else:
                    logging.error( _("Can't handle this type of entry yet: {}").format( repr(entry) ) )
                return result
            # end of convertEntry

            for dictKey in theDict.keys(): # Have to iterate this :(
                fieldsCount = len( theDict[dictKey] ) + 1 # Add one since we include the key in the count
                break # We only check the first (random) entry we get

            #hFile.write( "typedef struct {}EntryStruct { {} } {}Entry;\n\n".format( dictName, structure, dictName ) )
            hFile.write( "typedef struct {}EntryStruct {{\n".format( dictName ) )
            for declaration in structure.split(';'):
                adjDeclaration = declaration.strip()
                if adjDeclaration: hFile.write( "    {};\n".format( adjDeclaration ) )
            hFile.write( "}} {}Entry;\n\n".format( dictName ) )

            cFile.write( "const static {}Entry\n {}[{}] = {{\n  // Fields ({}) are {}\n  // Sorted by {}\n".format( dictName, dictName, len(theDict), fieldsCount, structure, sortedBy ) )
            for dictKey in sorted(theDict.keys()):
                if isinstance( dictKey, str ):
                    cFile.write( "  {{\"{}\", {}}},\n".format( dictKey, convertEntry(theDict[dictKey]) ) )
                elif isinstance( dictKey, int ):
                    cFile.write( "  {{{}, {}}},\n".format( dictKey, convertEntry(theDict[dictKey]) ) )
                else:
                    logging.error( _("Can't handle this type of key data yet: {}").format( dictKey ) )
            cFile.write( "]}}; // {} ({} entries)\n\n".format( dictName, len(theDict) ) )
        # end of exportPythonDict

        assert self._XMLtree
        self.importDataToPython()
        assert self.__DataDicts

        raise Exception( "C export not written yet, sorry." )
        if not filepath: filepath = os.path.join( os.path.split(self.__XMLFilepath)[0], "DerivedFiles", self._filenameBase + "_Tables" )
        hFilepath = filepath + '.h'
        cFilepath = filepath + '.c'
        if BibleOrgSysGlobals.verbosityLevel > 1: print( _("Exporting to {}…").format( cFilepath ) ) # Don't bother telling them about the .h file
        ifdefName = self._filenameBase.upper() + "_Tables_h"

        with open( hFilepath, 'wt', encoding='utf-8' ) as myHFile, \
             open( cFilepath, 'wt', encoding='utf-8' ) as myCFile:
            myHFile.write( "// {}\n//\n".format( hFilepath ) )
            myCFile.write( "// {}\n//\n".format( cFilepath ) )
            lines = "// This UTF-8 file was automatically generated by USFMMarkers.py V{} on {}\n//\n".format( ProgVersion, datetime.now() )
            myHFile.write( lines ); myCFile.write( lines )
            if self.titleString:
                lines = "// {} data\n".format( self.titleString )
                myHFile.write( lines ); myCFile.write( lines )
            if self.ProgVersion:
                lines = "//  Version: {}\n".format( self.ProgVersion )
                myHFile.write( lines ); myCFile.write( lines )
            if self.dateString:
                lines = "//  Date: {}\n//\n".format( self.dateString )
                myHFile.write( lines ); myCFile.write( lines )
            myCFile.write( "//   {} {} loaded from the original XML file.\n//\n\n".format( len(self._XMLtree), self._treeTag ) )
            myHFile.write( "\n#ifndef {}\n#define {}\n\n".format( ifdefName, ifdefName ) )
            myCFile.write( '#include "{}"\n\n'.format( os.path.basename(hFilepath) ) )

            CHAR = "const unsigned char"
            BYTE = "const int"
            dictInfo = {
                "referenceNumberDict":("referenceNumber (integer 1..255)",
                    "{} referenceNumber; {}* ByzantineAbbreviation; {}* CCELNumberString; {}* NETBibleAbbreviation; {}* OSISAbbreviation; {} ParatextAbbreviation[3+1]; {} ParatextNumberString[2+1]; {}* SBLAbbreviation; {}* SwordAbbreviation; {}* nameEnglish; {}* numExpectedChapters; {}* possibleAlternativeBooks; {} marker[3+1];"
                   .format(BYTE, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR ) ),
                "rawMarkerDict":("marker",
                    "{} marker[3+1]; {}* ByzantineAbbreviation; {}* CCELNumberString; {} referenceNumber; {}* NETBibleAbbreviation; {}* OSISAbbreviation; {} ParatextAbbreviation[3+1]; {} ParatextNumberString[2+1]; {}* SBLAbbreviation; {}* SwordAbbreviation; {}* nameEnglish; {}* numExpectedChapters; {}* possibleAlternativeBooks;"
                   .format(CHAR, CHAR, CHAR, BYTE, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR ) ),
                "CCELDict":("CCELNumberString", "{}* CCELNumberString; {} referenceNumber; {} marker[3+1];".format(CHAR,BYTE,CHAR) ),
                "SBLDict":("SBLAbbreviation", "{}* SBLAbbreviation; {} referenceNumber; {} marker[3+1];".format(CHAR,BYTE,CHAR) ),
                "EnglishNameDict":("nameEnglish", "{}* nameEnglish; {} referenceNumber; {} marker[3+1];".format(CHAR,BYTE,CHAR) ) }

            for dictName,dictData in self.__DataDicts.items():
                exportPythonDict( myHFile, myCFile, dictData, dictName, dictInfo[dictName][0], dictInfo[dictName][1] )

            myHFile.write( "#endif // {}\n\n".format( ifdefName ) )
            myHFile.write( "// end of {}".format( os.path.basename(hFilepath) ) )
            myCFile.write( "// end of {}".format( os.path.basename(cFilepath) ) )

Exemplo n.º 12

0

Exibir arquivo

class BibleReferencesLinksConverter:
    """
    Class for reading, validating, and converting BibleReferencesLinks.
    This is only intended as a transitory class (used at start-up).
    The BibleReferencesLinks class has functions more generally useful.
    """

    def __init__( self ): # We can't give this parameters because of the singleton
        """
        Constructor: expects the filepath of the source XML file.
        Loads (and crudely validates the XML file) into an element tree.
        """
        self._filenameBase = 'BibleReferencesLinks'

        # These fields are used for parsing the XML
        self._treeTag = 'BibleReferencesLinks'
        self._headerTag = 'header'
        self._mainElementTag = 'BibleReferenceLinks'

        # These fields are used for automatically checking/validating the XML
        self._compulsoryAttributes = ()
        self._optionalAttributes = ()
        self._uniqueAttributes = self._compulsoryAttributes + self._optionalAttributes
        self._compulsoryElements = ( 'sourceReference', 'sourceComponent', 'BibleReferenceLink', )
        self._optionalElements = (  )
        self._uniqueElements = ( 'sourceReference' )

        # These are fields that we will fill later
        self._XMLheader, self._XMLtree = None, None
        self.__DataList = {} # Used for import
        self.titleString = self.ProgVersion = self.dateString = ''
    # end of BibleReferencesLinksConverter.__init__


    def loadAndValidate( self, XMLFilepath=None ):
        """
        Loads (and crudely validates the XML file) into an element tree.
            Allows the filepath of the source XML file to be specified, otherwise uses the default.
        """
        if self._XMLtree is None: # We mustn't have already have loaded the data
            if XMLFilepath is None:
                XMLFilepath = os.path.join( os.path.dirname(__file__), "DataFiles", self._filenameBase + ".xml" ) # Relative to module, not cwd
            self.__load( XMLFilepath )
            if BibleOrgSysGlobals.strictCheckingFlag:
                self.__validate()
        else: # The data must have been already loaded
            if XMLFilepath is not None and XMLFilepath!=self.__XMLFilepath: logging.error( _("Bible references links are already loaded -- your different filepath of {!r} was ignored").format( XMLFilepath ) )
        return self
    # end of BibleReferencesLinksConverter.loadAndValidate


    def __load( self, XMLFilepath ):
        """
        Load the source XML file and remove the header from the tree.
        Also, extracts some useful elements from the header element.
        """
        assert XMLFilepath
        self.__XMLFilepath = XMLFilepath
        assert self._XMLtree is None or len(self._XMLtree)==0 # Make sure we're not doing this twice

        if BibleOrgSysGlobals.verbosityLevel > 2: print( _("Loading BibleReferencesLinks XML file from {!r}…").format( self.__XMLFilepath ) )
        self._XMLtree = ElementTree().parse( self.__XMLFilepath )
        assert self._XMLtree # Fail here if we didn't load anything at all

        if self._XMLtree.tag == self._treeTag:
            header = self._XMLtree[0]
            if header.tag == self._headerTag:
                self.XMLheader = header
                self._XMLtree.remove( header )
                BibleOrgSysGlobals.checkXMLNoText( header, "header" )
                BibleOrgSysGlobals.checkXMLNoTail( header, "header" )
                BibleOrgSysGlobals.checkXMLNoAttributes( header, "header" )
                if len(header)>1:
                    logging.info( _("Unexpected elements in header") )
                elif len(header)==0:
                    logging.info( _("Missing work element in header") )
                else:
                    work = header[0]
                    BibleOrgSysGlobals.checkXMLNoText( work, "work in header" )
                    BibleOrgSysGlobals.checkXMLNoTail( work, "work in header" )
                    BibleOrgSysGlobals.checkXMLNoAttributes( work, "work in header" )
                    if work.tag == "work":
                        self.ProgVersion = work.find('version').text
                        self.dateString = work.find("date").text
                        self.titleString = work.find("title").text
                    else:
                        logging.warning( _("Missing work element in header") )
            else:
                logging.warning( _("Missing header element (looking for {!r} tag)".format( self._headerTag ) ) )
            if header.tail is not None and header.tail.strip(): logging.error( _("Unexpected {!r} tail data after header").format( header.tail ) )
        else:
            logging.error( _("Expected to load {!r} but got {!r}").format( self._treeTag, self._XMLtree.tag ) )
    # end of BibleReferencesLinksConverter.__load


    def __validate( self ):
        """
        Check/validate the loaded data.
        """
        assert self._XMLtree

        uniqueDict = {}
        for elementName in self._uniqueElements: uniqueDict["Element_"+elementName] = []
        for attributeName in self._uniqueAttributes: uniqueDict["Attribute_"+attributeName] = []

        expectedID = 1
        for j,element in enumerate(self._XMLtree):
            if element.tag == self._mainElementTag:
                BibleOrgSysGlobals.checkXMLNoText( element, element.tag )
                BibleOrgSysGlobals.checkXMLNoTail( element, element.tag )
                if not self._compulsoryAttributes and not self._optionalAttributes: BibleOrgSysGlobals.checkXMLNoAttributes( element, element.tag )
                if not self._compulsoryElements and not self._optionalElements: BibleOrgSysGlobals.checkXMLNoSubelements( element, element.tag )

                # Check compulsory attributes on this main element
                for attributeName in self._compulsoryAttributes:
                    attributeValue = element.get( attributeName )
                    if attributeValue is None:
                        logging.error( _("Compulsory {!r} attribute is missing from {} element in record {}").format( attributeName, element.tag, j ) )
                    if not attributeValue:
                        logging.warning( _("Compulsory {!r} attribute is blank on {} element in record {}").format( attributeName, element.tag, j ) )

                # Check optional attributes on this main element
                for attributeName in self._optionalAttributes:
                    attributeValue = element.get( attributeName )
                    if attributeValue is not None:
                        if not attributeValue:
                            logging.warning( _("Optional {!r} attribute is blank on {} element in record {}").format( attributeName, element.tag, j ) )

                # Check for unexpected additional attributes on this main element
                for attributeName in element.keys():
                    attributeValue = element.get( attributeName )
                    if attributeName not in self._compulsoryAttributes and attributeName not in self._optionalAttributes:
                        logging.warning( _("Additional {!r} attribute ({!r}) found on {} element in record {}").format( attributeName, attributeValue, element.tag, j ) )

                # Check the attributes that must contain unique information (in that particular field -- doesn't check across different attributes)
                for attributeName in self._uniqueAttributes:
                    attributeValue = element.get( attributeName )
                    if attributeValue is not None:
                        if attributeValue in uniqueDict["Attribute_"+attributeName]:
                            logging.error( _("Found {!r} data repeated in {!r} field on {} element in record {}").format( attributeValue, attributeName, element.tag, j ) )
                        uniqueDict["Attribute_"+attributeName].append( attributeValue )

                # Get the sourceComponent to use as a record ID
                ID = element.find("sourceComponent").text

                # Check compulsory elements
                for elementName in self._compulsoryElements:
                    foundElement = element.find( elementName )
                    if foundElement is None:
                        logging.error( _("Compulsory {!r} element is missing in record with ID {!r} (record {})").format( elementName, ID, j ) )
                    else:
                        BibleOrgSysGlobals.checkXMLNoTail( foundElement, foundElement.tag + " in " + element.tag )
                        BibleOrgSysGlobals.checkXMLNoAttributes( foundElement, foundElement.tag + " in " + element.tag )
                        #BibleOrgSysGlobals.checkXMLNoSubelements( foundElement, foundElement.tag + " in " + element.tag )
                        if not foundElement.text:
                            logging.warning( _("Compulsory {!r} element is blank in record with ID {!r} (record {})").format( elementName, ID, j ) )

                # Check optional elements
                for elementName in self._optionalElements:
                    foundElement = element.find( elementName )
                    if foundElement is not None:
                        BibleOrgSysGlobals.checkXMLNoTail( foundElement, foundElement.tag + " in " + element.tag )
                        BibleOrgSysGlobals.checkXMLNoAttributes( foundElement, foundElement.tag + " in " + element.tag )
                        BibleOrgSysGlobals.checkXMLNoSubelements( foundElement, foundElement.tag + " in " + element.tag )
                        if not foundElement.text:
                            logging.warning( _("Optional {!r} element is blank in record with ID {!r} (record {})").format( elementName, ID, j ) )

                # Check for unexpected additional elements
                for subelement in element:
                    if subelement.tag not in self._compulsoryElements and subelement.tag not in self._optionalElements:
                        logging.warning( _("Additional {!r} element ({!r}) found in record with ID {!r} (record {})").format( subelement.tag, subelement.text, ID, j ) )

                # Check the elements that must contain unique information (in that particular element -- doesn't check across different elements)
                for elementName in self._uniqueElements:
                    if element.find( elementName ) is not None:
                        text = element.find( elementName ).text
                        if text in uniqueDict["Element_"+elementName]:
                            logging.error( _("Found {!r} data repeated in {!r} element in record with ID {!r} (record {})").format( text, elementName, ID, j ) )
                        uniqueDict["Element_"+elementName].append( text )
            else:
                logging.warning( _("Unexpected element: {} in record {}").format( element.tag, j ) )
            if element.tail is not None and element.tail.strip(): logging.error( _("Unexpected {!r} tail data after {} element in record {}").format( element.tail, element.tag, j ) )
        if self._XMLtree.tail is not None and self._XMLtree.tail.strip(): logging.error( _("Unexpected {!r} tail data after {} element").format( self._XMLtree.tail, self._XMLtree.tag ) )
    # end of BibleReferencesLinksConverter.__validate


    def __str__( self ):
        """
        This method returns the string representation of a Bible book code.

        @return: the name of a Bible object formatted as a string
        @rtype: string
        """
        indent = 2
        result = "BibleReferencesLinksConverter object"
        if self.titleString: result += ('\n' if result else '') + ' '*indent + _("Title: {}").format( self.titleString )
        if self.ProgVersion: result += ('\n' if result else '') + ' '*indent + _("Version: {}").format( self.ProgVersion )
        if self.dateString: result += ('\n' if result else '') + ' '*indent + _("Date: {}").format( self.dateString )
        if self._XMLtree is not None: result += ('\n' if result else '') + ' '*indent + _("Number of entries = {}").format( len(self._XMLtree) )
        return result
    # end of BibleReferencesLinksConverter.__str__


    def __len__( self ):
        """
        Returns the number of references links loaded.
        """
        return len( self._XMLtree )
    # end of BibleReferencesLinksConverter.__len__


    def importDataToPython( self ):
        """
        Loads (and pivots) the data (not including the header) into suitable Python containers to use in a Python program.
        (Of course, you can just use the elementTree in self._XMLtree if you prefer.)
        """
        def makeList( parameter1, parameter2 ):
            """
            Returns a list containing all parameters. Parameter1 may already be a list.
            """
            if isinstance( parameter1, list ):
                #assert parameter2 not in parameter1
                parameter1.append( parameter2 )
                return parameter1
            else:
                return [ parameter1, parameter2 ]
        # end of makeList


        assert self._XMLtree
        if self.__DataList: # We've already done an import/restructuring -- no need to repeat it
            return self.__DataList, self.__DataDict

        # We'll create a number of dictionaries with different elements as the key
        rawRefLinkList = []
        actualLinkCount = 0
        for element in self._XMLtree:
            #print( BibleOrgSysGlobals.elementStr( element ) )

            # Get these first for helpful error messages
            sourceReference = element.find('sourceReference').text
            sourceComponent = element.find('sourceComponent').text
            assert sourceComponent in ('Section','Verses','Verse',)

            BibleOrgSysGlobals.checkXMLNoText( element, sourceReference, 'kls1' )
            BibleOrgSysGlobals.checkXMLNoAttributes( element, sourceReference, 'kd21' )
            BibleOrgSysGlobals.checkXMLNoTail( element, sourceReference, 'so20' )

            actualRawLinksList = []
            for subelement in element:
                #print( BibleOrgSysGlobals.elementStr( subelement ) )
                if subelement.tag in ( 'sourceReference','sourceComponent',): # already processed these
                    BibleOrgSysGlobals.checkXMLNoAttributes( subelement, sourceReference, 'ls12' )
                    BibleOrgSysGlobals.checkXMLNoSubelements( subelement, sourceReference, 'ks02' )
                    BibleOrgSysGlobals.checkXMLNoTail( subelement, sourceReference, 'sqw1' )

                elif subelement.tag == 'BibleReferenceLink':
                    BibleOrgSysGlobals.checkXMLNoText( subelement, sourceReference, 'haw9' )
                    BibleOrgSysGlobals.checkXMLNoAttributes( subelement, sourceReference, 'hs19' )
                    BibleOrgSysGlobals.checkXMLNoTail( subelement, sourceReference, 'jsd9' )

                    targetReference = subelement.find('targetReference').text
                    targetComponent = subelement.find('targetComponent').text
                    assert targetComponent in ('Section','Verses','Verse',)
                    linkType = subelement.find('linkType').text
                    assert linkType in ('TSK','QuotedOTReference','AlludedOTReference','PossibleOTReference',)

                    actualRawLinksList.append( (targetReference,targetComponent,linkType,) )
                    actualLinkCount += 1

            rawRefLinkList.append( (sourceReference,sourceComponent,actualRawLinksList,) )

        if BibleOrgSysGlobals.verbosityLevel > 1:
            print( "  {} raw links loaded (with {} actual raw link entries)".format( len(rawRefLinkList), actualLinkCount ) )


        myRefLinkList = []
        actualLinkCount = 0
        BOS = BibleOrganizationalSystem( 'GENERIC-KJV-66-ENG' )

        for j,(sourceReference,sourceComponent,actualRawLinksList) in enumerate( rawRefLinkList ):
            # Just do some testing first
            if sourceComponent == 'Verse':
                x = SimpleVerseKey( sourceReference )
            else:
                flag = False
                try:
                    x = SimpleVerseKey( sourceReference, ignoreParseErrors=True )
                    flag = True
                except TypeError: pass # This should happen coz it should fail the SVK
                if flag:
                    logging.error( "{} {!r} failed!".format( sourceComponent, sourceReference ) )
                    raise TypeError
            # Now do the actual parsing
            parsedSourceReference = FlexibleVersesKey( sourceReference )
            if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                print( j, sourceComponent, sourceReference, parsedSourceReference )
                #assert parsedSourceReference.getShortText().replace(' ','_') == sourceReference
            actualLinksList = []
            for k,(targetReference,targetComponent,linkType) in enumerate( actualRawLinksList ):
                # Just do some testing first
                if targetComponent == 'Verse':
                    x = SimpleVerseKey( targetReference )
                else:
                    flag = False
                    try:
                        x = SimpleVerseKey( targetReference, ignoreParseErrors=True )
                        flag = True
                    except TypeError: pass # This should happen coz it should fail the SVK
                    if flag:
                        logging.error( "{} {!r} failed!".format( targetComponent, targetReference ) )
                        raise TypeError
                # Now do the actual parsing
                try: parsedTargetReference = FlexibleVersesKey( targetReference )
                except TypeError:
                    print( "  Temporarily ignored {!r} (TypeError from FlexibleVersesKey)".format( targetReference ) )
                    parsedTargetReference = None
                if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
                    print( ' ', targetComponent, targetReference, parsedTargetReference )
                    #assert parsedTargetReference.getShortText().replace(' ','_',1) == targetReference

                actualLinksList.append( (targetReference,targetComponent,parsedTargetReference,linkType,) )
                actualLinkCount += 1

            myRefLinkList.append( (sourceReference,sourceComponent,parsedSourceReference,actualLinksList,) )

        if BibleOrgSysGlobals.verbosityLevel > 1:
            print( "  {} links processed (with {} actual link entries)".format( len(rawRefLinkList), actualLinkCount ) )
        #print( myRefLinkList ); halt
        self.__DataList = myRefLinkList

        # Now put it into my dictionaries for easy access
        # This part should be customized or added to for however you need to process the data

        # Create a link dictionary (by verse key)
        myRefLinkDict = {}
        for sourceReference,sourceComponent,parsedSourceReference,actualLinksList in myRefLinkList:
            #print( sourceReference, sourceComponent, parsedSourceReference )
            #print( sourceReference, sourceComponent, parsedSourceReference, actualLinksList )
            for verseRef in parsedSourceReference.getIncludedVerses():
                #print( verseRef )
                assert isinstance( verseRef, SimpleVerseKey )
                if verseRef not in myRefLinkDict: myRefLinkDict[verseRef] = []
                myRefLinkDict[verseRef].append( (sourceReference,sourceComponent,parsedSourceReference,actualLinksList,) )
            #print( myRefLinkDict ); halt
        originalLinks = len( myRefLinkDict )
        print( "  {} verse links added to dictionary (includes filling out spans)".format( originalLinks ) )
        #print( myRefLinkDict ); halt

        # Create a reversed link dictionary (by verse key)
        for sourceReference,sourceComponent,parsedSourceReference,actualLinksList in myRefLinkList:
            #print( sourceReference, sourceComponent, parsedSourceReference )
            #print( sourceReference, sourceComponent, parsedSourceReference, actualLinksList )
            for targetReference,targetComponent,parsedTargetReference,linkType in actualLinksList:
                if parsedTargetReference is not None:
                    for verseRef in parsedTargetReference.getIncludedVerses():
                        #print( verseRef )
                        assert isinstance( verseRef, SimpleVerseKey )
                        if linkType == 'TSK': reverseLinkType = 'TSKQuoted'
                        elif linkType == 'QuotedOTReference': reverseLinkType = 'OTReferenceQuoted'
                        elif linkType == 'AlludedOTReference': reverseLinkType = 'OTReferenceAlluded'
                        elif linkType == 'PossibleOTReference': reverseLinkType = 'OTReferencePossible'
                        else: halt # Have a new linkType!
                        if verseRef not in myRefLinkDict: myRefLinkDict[verseRef] = []
                        myRefLinkDict[verseRef].append( (targetReference,targetComponent,parsedTargetReference,[(sourceReference,sourceComponent,parsedSourceReference,reverseLinkType)]) )
            #print( myRefLinkDict ); halt
        totalLinks = len( myRefLinkDict )
        reverseLinks = totalLinks - originalLinks
        print( "  {} reverse links added to dictionary to give {} total".format( reverseLinks, totalLinks ) )
        #print( myRefLinkDict ); halt

        self.__DataDict = myRefLinkDict

        # Let's find the most number of references for a verse
        mostReferences = totalReferences = 0
        for verseRef, entryList in self.__DataDict.items():
            numRefs = len( entryList )
            if numRefs > mostReferences: mostReferences, mostVerseRef = numRefs, verseRef
            totalReferences += numRefs
        print( "  {} maximum links for any one reference ({})".format( mostReferences, mostVerseRef.getShortText() ) )
        print( "  {} total links for all references".format( totalReferences ) )

        return self.__DataList, self.__DataDict
    # end of BibleReferencesLinksConverter.importDataToPython


    def pickle( self, filepath=None ):
        """
        Writes the information tables to a .pickle file that can be easily loaded into a Python3 program.
        """
        import pickle

        assert self._XMLtree
        self.importDataToPython()
        assert self.__DataList
        assert self.__DataDict

        if not filepath:
            folder = os.path.join( os.path.split(self.__XMLFilepath)[0], "DerivedFiles/" )
            if not os.path.exists( folder ): os.mkdir( folder )
            filepath = os.path.join( folder, self._filenameBase + "_Tables.pickle" )
        if BibleOrgSysGlobals.verbosityLevel > 1: print( _("Exporting to {}…").format( filepath ) )
        with open( filepath, 'wb' ) as myFile:
            pickle.dump( self.__DataList, myFile )
            pickle.dump( self.__DataDict, myFile )
    # end of BibleReferencesLinksConverter.pickle


    def exportDataWithIndex( self, filepath=None ):
        """
        Writes the information tables to a .pickle index file and .json file that can be easily loaded into a Java program.

        See http://en.wikipedia.org/wiki/JSON.
        """
        import pickle

        assert self._XMLtree
        self.importDataToPython()
        assert self.__DataList
        assert self.__DataDict

        if not filepath:
            folder = os.path.join( os.path.split(self.__XMLFilepath)[0], "DerivedFiles/" )
            if not os.path.exists( folder ): os.mkdir( folder )
            indexFilepath = os.path.join( folder, self._filenameBase + "_Tables.index.pickle" )
            dataFilepath = os.path.join( folder, self._filenameBase + "_Tables.data.pickle" )
        if BibleOrgSysGlobals.verbosityLevel > 1: print( _("Exporting to {}…").format( dataFilepath ) )
        index = {}
        filePosition = 0
        with open( dataFilepath, 'wb' ) as myFile:
            for vKey,refList in self.__DataDict.items():
                #print( "vKey", vKey, vKey.getVerseKeyText() )
                #print( " ", refList )
                length = myFile.write( pickle.dumps( refList ) )
                #print( " ", filePosition, length )
                assert vKey not in index
                index[vKey] = (filePosition, length )
                filePosition += length
        with open( indexFilepath, 'wb' ) as myFile:
            pickle.dump( index, myFile )
    # end of BibleReferencesLinksConverter.exportDataWithIndex


    def exportDataToPython( self, filepath=None ):
        """
        Writes the information tables to a .py file that can be cut and pasted into a Python program.
        """
        def exportPythonDictOrList( theFile, theDictOrList, dictName, keyComment, fieldsComment ):
            """Exports theDictOrList to theFile."""
            assert theDictOrList
            raise Exception( "Not written yet" )
            for dictKey in theDict.keys(): # Have to iterate this :(
                fieldsCount = len( theDict[dictKey] )
                break # We only check the first (random) entry we get
            theFile.write( "{} = {{\n  # Key is {}\n  # Fields ({}) are: {}\n".format( dictName, keyComment, fieldsCount, fieldsComment ) )
            for dictKey in sorted(theDict.keys()):
                theFile.write( '  {}: {},\n'.format( repr(dictKey), repr(theDict[dictKey]) ) )
            theFile.write( "}}\n# end of {} ({} entries)\n\n".format( dictName, len(theDict) ) )
        # end of exportPythonDictOrList


        assert self._XMLtree
        self.importDataToPython()
        assert self.__DataList
        assert self.__DataDict

        print( "Export to Python not written yet!" )
        halt

        if not filepath:
            folder = os.path.join( os.path.split(self.__XMLFilepath)[0], "DerivedFiles/" )
            if not os.path.exists( folder ): os.mkdir( folder )
            filepath = os.path.join( folder, self._filenameBase + "_Tables.py" )
        if BibleOrgSysGlobals.verbosityLevel > 1: print( _("Exporting to {}…").format( filepath ) )
        with open( filepath, 'wt', encoding='utf-8' ) as myFile:
            myFile.write( "# {}\n#\n".format( filepath ) )
            myFile.write( "# This UTF-8 file was automatically generated by BibleReferencesLinks.py V{} on {}\n#\n".format( ProgVersion, datetime.now() ) )
            if self.titleString: myFile.write( "# {} data\n".format( self.titleString ) )
            if self.ProgVersion: myFile.write( "#  Version: {}\n".format( self.ProgVersion ) )
            if self.dateString: myFile.write( "#  Date: {}\n#\n".format( self.dateString ) )
            myFile.write( "#   {} {} loaded from the original XML file.\n#\n\n".format( len(self._XMLtree), self._treeTag ) )
            mostEntries = "0=referenceNumber (integer 1..255), 1=sourceComponent/BBB (3-uppercase characters)"
            dictInfo = { "referenceNumberDict":("referenceNumber (integer 1..255)","specified"),
                    "sourceComponentDict":("sourceComponent","specified"),
                    "sequenceList":("sourceComponent/BBB (3-uppercase characters)",""),
                    "initialAllAbbreviationsDict":("allAbbreviations", mostEntries) }
            for dictName,dictData in self.__DataList.items():
                exportPythonDictOrList( myFile, dictData, dictName, dictInfo[dictName][0], dictInfo[dictName][1] )
            myFile.write( "# end of {}".format( os.path.basename(filepath) ) )
    # end of BibleReferencesLinksConverter.exportDataToPython


    def exportDataToJSON( self, filepath=None ):
        """
        Writes the information tables to a .json file that can be easily loaded into a Java program.

        See http://en.wikipedia.org/wiki/JSON.
        """
        import json

        assert self._XMLtree
        self.importDataToPython()
        assert self.__DataList
        assert self.__DataDict

        if not filepath:
            folder = os.path.join( os.path.split(self.__XMLFilepath)[0], "DerivedFiles/" )
            if not os.path.exists( folder ): os.mkdir( folder )
            filepath = os.path.join( folder, self._filenameBase + "_Tables.json" )
        if BibleOrgSysGlobals.verbosityLevel > 1: print( _("Exporting to {}…").format( filepath ) )
        with open( filepath, 'wt', encoding='utf-8' ) as myFile:
            for something in self.__DataList: # temp for debugging ...........................................
                print( "Dumping something", something )
                json.dump( something, myFile, indent=2 )
            json.dump( self.__DataList, myFile, indent=2 )

            for someKey,someItem in self.__DataDict.items(): # temp for debugging ...........................................
                print( "Dumping someKey", someKey )
                json.dump( someKey, myFile, indent=2 )
                print( "Dumping someItem", someItem )
                json.dump( someItem, myFile, indent=2 )
            json.dump( self.__DataDict, myFile, indent=2 )
    # end of BibleReferencesLinksConverter.exportDataToJSON


    def exportDataToC( self, filepath=None ):
        """
        Writes the information tables to a .h and .c files that can be included in c and c++ programs.

        NOTE: The (optional) filepath should not have the file extension specified -- this is added automatically.
        """
        def exportPythonDict( hFile, cFile, theDict, dictName, sortedBy, structure ):
            """ Exports theDict to the .h and .c files. """
            def convertEntry( entry ):
                """ Convert special characters in an entry… """
                result = ""
                if isinstance( entry, str ):
                    result = entry
                elif isinstance( entry, tuple ):
                    for field in entry:
                        if result: result += ", " # Separate the fields
                        if field is None: result += '""'
                        elif isinstance( field, str): result += '"' + str(field).replace('"','\\"') + '"'
                        elif isinstance( field, int): result += str(field)
                        elif isinstance( field, list): raise Exception( "Not written yet (list1)" )
                        else: logging.error( _("Cannot convert unknown field type {!r} in tuple entry {!r}").format( field, entry ) )
                elif isinstance( entry, dict ):
                    for key in sorted(entry.keys()):
                        field = entry[key]
                        if result: result += ", " # Separate the fields
                        if field is None: result += '""'
                        elif isinstance( field, str): result += '"' + str(field).replace('"','\\"') + '"'
                        elif isinstance( field, int): result += str(field)
                        elif isinstance( field, list): raise Exception( "Not written yet (list2)" )
                        else: logging.error( _("Cannot convert unknown field type {!r} in dict entry {!r}").format( field, entry ) )
                else:
                    logging.error( _("Can't handle this type of entry yet: {}").format( repr(entry) ) )
                return result
            # end of convertEntry

            for dictKey in theDict.keys(): # Have to iterate this :(
                fieldsCount = len( theDict[dictKey] ) + 1 # Add one since we include the key in the count
                break # We only check the first (random) entry we get

            #hFile.write( "typedef struct {}EntryStruct { {} } {}Entry;\n\n".format( dictName, structure, dictName ) )
            hFile.write( "typedef struct {}EntryStruct {{\n".format( dictName ) )
            for declaration in structure.split(';'):
                adjDeclaration = declaration.strip()
                if adjDeclaration: hFile.write( "    {};\n".format( adjDeclaration ) )
            hFile.write( "}} {}Entry;\n\n".format( dictName ) )

            cFile.write( "const static {}Entry\n {}[{}] = {{\n  // Fields ({}) are {}\n  // Sorted by {}\n".format( dictName, dictName, len(theDict), fieldsCount, structure, sortedBy ) )
            for dictKey in sorted(theDict.keys()):
                if isinstance( dictKey, str ):
                    cFile.write( "  {{\"{}\", {}}},\n".format( dictKey, convertEntry(theDict[dictKey]) ) )
                elif isinstance( dictKey, int ):
                    cFile.write( "  {{{}, {}}},\n".format( dictKey, convertEntry(theDict[dictKey]) ) )
                else:
                    logging.error( _("Can't handle this type of key data yet: {}").format( dictKey ) )
            cFile.write( "]}}; // {} ({} entries)\n\n".format( dictName, len(theDict) ) )
        # end of exportPythonDict


        assert self._XMLtree
        self.importDataToPython()
        assert self.__DataList

        print( "Export to C not written yet!" )
        halt

        if not filepath:
            folder = os.path.join( os.path.split(self.__XMLFilepath)[0], "DerivedFiles/" )
            if not os.path.exists( folder ): os.mkdir( folder )
            filepath = os.path.join( folder, self._filenameBase + "_Tables" )
        hFilepath = filepath + '.h'
        cFilepath = filepath + '.c'
        if BibleOrgSysGlobals.verbosityLevel > 1: print( _("Exporting to {}…").format( cFilepath ) ) # Don't bother telling them about the .h file
        ifdefName = self._filenameBase.upper() + "_Tables_h"

        with open( hFilepath, 'wt', encoding='utf-8' ) as myHFile, \
             open( cFilepath, 'wt', encoding='utf-8' ) as myCFile:
            myHFile.write( "// {}\n//\n".format( hFilepath ) )
            myCFile.write( "// {}\n//\n".format( cFilepath ) )
            lines = "// This UTF-8 file was automatically generated by BibleReferencesLinks.py V{} on {}\n//\n".format( ProgVersion, datetime.now() )
            myHFile.write( lines ); myCFile.write( lines )
            if self.titleString:
                lines = "// {} data\n".format( self.titleString )
                myHFile.write( lines ); myCFile.write( lines )
            if self.ProgVersion:
                lines = "//  Version: {}\n".format( self.ProgVersion )
                myHFile.write( lines ); myCFile.write( lines )
            if self.dateString:
                lines = "//  Date: {}\n//\n".format( self.dateString )
                myHFile.write( lines ); myCFile.write( lines )
            myCFile.write( "//   {} {} loaded from the original XML file.\n//\n\n".format( len(self._XMLtree), self._treeTag ) )
            myHFile.write( "\n#ifndef {}\n#define {}\n\n".format( ifdefName, ifdefName ) )
            myCFile.write( '#include "{}"\n\n'.format( os.path.basename(hFilepath) ) )

            CHAR = "const unsigned char"
            BYTE = "const int"
            dictInfo = {
                "referenceNumberDict":("referenceNumber (integer 1..255)",
                    "{} referenceNumber; {}* ByzantineAbbreviation; {}* CCELNumberString; {}* NETBibleAbbreviation; {}* OSISAbbreviation; {} USFMAbbreviation[3+1]; {} USFMNumberString[2+1]; {}* SBLAbbreviation; {}* SwordAbbreviation; {}* sourceReference; {}* numExpectedChapters; {}* possibleAlternativeBooks; {} sourceComponent[3+1];"
                   .format(BYTE, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR ) ),
                "sourceComponentDict":("sourceComponent",
                    "{} sourceComponent[3+1]; {}* ByzantineAbbreviation; {}* CCELNumberString; {} referenceNumber; {}* NETBibleAbbreviation; {}* OSISAbbreviation; {} USFMAbbreviation[3+1]; {} USFMNumberString[2+1]; {}* SBLAbbreviation; {}* SwordAbbreviation; {}* sourceReference; {}* numExpectedChapters; {}* possibleAlternativeBooks;"
                   .format(CHAR, CHAR, CHAR, BYTE, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR, CHAR ) ),
                "sequenceList":("sequenceList",),
                "CCELDict":("CCELNumberString", "{}* CCELNumberString; {} referenceNumber; {} sourceComponent[3+1];".format(CHAR,BYTE,CHAR) ),
                "initialAllAbbreviationsDict":("abbreviation", "{}* abbreviation; {} sourceComponent[3+1];".format(CHAR,CHAR) ) }

            for dictName,dictData in self.__DataList.items():
                exportPythonDict( myHFile, myCFile, dictData, dictName, dictInfo[dictName][0], dictInfo[dictName][1] )

            myHFile.write( "#endif // {}\n\n".format( ifdefName ) )
            myHFile.write( "// end of {}".format( os.path.basename(hFilepath) ) )
            myCFile.write( "// end of {}".format( os.path.basename(cFilepath) ) )