示例#1
0
    def __init__(self,
                 node,
                 graph,
                 inherited_state=None,
                 base="",
                 options=None,
                 rdfa_version=None):
        """
		@param node: the current DOM Node
		@param graph: the RDFLib Graph
		@keyword inherited_state: the state as inherited
		from upper layers. This inherited_state is mixed with the state information
		retrieved from the current node.
		@type inherited_state: L{state.ExecutionContext}
		@keyword base: string denoting the base URI for the specific node. This overrides the possible
		base inherited from the upper layers. The 
		current XHTML+RDFa syntax does not allow the usage of C{@xml:base}, but SVG1.2 does, so this is
		necessary for SVG (and other possible XML dialects that accept C{@xml:base})
		@keyword options: invocation options, and references to warning graphs
		@type options: L{Options<pyRdfa.options>}
		"""
        def remove_frag_id(uri):
            """
			The fragment ID for self.base must be removed
			"""
            try:
                # To be on the safe side:-)
                t = urlparse(uri)
                return urlunparse((t[0], t[1], t[2], t[3], t[4], ""))
            except:
                return uri

        # This is, conceptually, an additional class initialization, but it must be done run time, otherwise import errors show up
        if len(ExecutionContext._resource_type) == 0:
            ExecutionContext._resource_type = {
                "href": ExecutionContext._URI,
                "src": ExecutionContext._URI,
                "vocab": ExecutionContext._URI,
                "about": ExecutionContext._CURIEorURI,
                "resource": ExecutionContext._CURIEorURI,
                "rel": ExecutionContext._TERMorCURIEorAbsURI,
                "rev": ExecutionContext._TERMorCURIEorAbsURI,
                "datatype": ExecutionContext._TERMorCURIEorAbsURI,
                "typeof": ExecutionContext._TERMorCURIEorAbsURI,
                "property": ExecutionContext._TERMorCURIEorAbsURI,
                "role": ExecutionContext._TERMorCURIEorAbsURI,
            }
        #-----------------------------------------------------------------
        self.node = node

        #-----------------------------------------------------------------
        # Settling the base. In a generic XML, xml:base should be accepted at all levels (though this is not the
        # case in, say, XHTML...)
        # At the moment, it is invoked with a 'None' at the top level of parsing, that is
        # when the <base> element is looked for (for the HTML cases, that is)
        if inherited_state:
            self.rdfa_version = inherited_state.rdfa_version
            self.base = inherited_state.base
            self.options = inherited_state.options

            self.list_mapping = inherited_state.list_mapping
            self.new_list = False

            # for generic XML versions the xml:base attribute should be handled
            if self.options.host_language in accept_xml_base and node.hasAttribute(
                    "xml:base"):
                self.base = remove_frag_id(node.getAttribute("xml:base"))
        else:
            # this is the branch called from the very top
            self.list_mapping = ListStructure()
            self.new_list = True

            if rdfa_version is not None:
                self.rdfa_version = rdfa_version
            else:
                from pyRdfa import rdfa_current_version
                self.rdfa_version = rdfa_current_version

            # This value can be overwritten by a @version attribute
            if node.hasAttribute("version"):
                top_version = node.getAttribute("version")
                if top_version.find("RDFa 1.0") != -1 or top_version.find(
                        "RDFa1.0") != -1:
                    self.rdfa_version = "1.0"
                elif top_version.find("RDFa 1.1") != -1 or top_version.find(
                        "RDFa1.1") != -1:
                    self.rdfa_version = "1.1"

            # this is just to play safe. I believe this should actually not happen...
            if options == None:
                from pyRdfa import Options
                self.options = Options()
            else:
                self.options = options

            self.base = ""
            # handle the base element case for HTML
            if self.options.host_language in [
                    HostLanguage.xhtml, HostLanguage.html5, HostLanguage.xhtml5
            ]:
                for bases in node.getElementsByTagName("base"):
                    if bases.hasAttribute("href"):
                        self.base = remove_frag_id(bases.getAttribute("href"))
                        continue
            elif self.options.host_language in accept_xml_base and node.hasAttribute(
                    "xml:base"):
                self.base = remove_frag_id(node.getAttribute("xml:base"))

            # If no local setting for base occurs, the input argument has it
            if self.base == "":
                self.base = base

            # Perform an extra beautification in RDFLib
            if self.options.host_language in beautifying_prefixes:
                dict = beautifying_prefixes[self.options.host_language]
                for key in dict:
                    graph.bind(key, dict[key])

            input_info = "Input Host Language:%s, RDFa version:%s, base:%s" % (
                self.options.host_language, self.rdfa_version, self.base)
            self.options.add_info(input_info)

        #-----------------------------------------------------------------
        # this will be used repeatedly, better store it once and for all...
        self.parsedBase = urlsplit(self.base)

        #-----------------------------------------------------------------
        # generate and store the local CURIE handling class instance
        self.term_or_curie = TermOrCurie(self, graph, inherited_state)

        #-----------------------------------------------------------------
        # Settling the language tags
        # @lang has priority over @xml:lang
        # it is a bit messy: the three fundamental modes (xhtml, html, or xml) are all slightly different:-(
        # first get the inherited state's language, if any
        if inherited_state:
            self.lang = inherited_state.lang
        else:
            self.lang = None

        self.supress_lang = False

        if self.options.host_language in [
                HostLanguage.xhtml, HostLanguage.xhtml5, HostLanguage.html5
        ]:
            # we may have lang and xml:lang
            if node.hasAttribute("lang"):
                lang = node.getAttribute("lang").lower()
            else:
                lang = None
            if node.hasAttribute("xml:lang"):
                xmllang = node.getAttribute("xml:lang").lower()
            else:
                xmllang = None
            # First of all, set the value, if any
            if xmllang != None:
                # this has priority
                if len(xmllang) != 0:
                    self.lang = xmllang
                else:
                    self.lang = None
            elif lang != None:
                if len(lang) != 0:
                    self.lang = lang
                else:
                    self.lang = None
            # Ideally, a warning should be generated if lang and xmllang are both present with different values. But
            # the HTML5 Parser does its magic by overriding a lang value if xmllang is present, so the potential
            # error situations are simply swallowed...

        elif self.options.host_language in accept_xml_lang and node.hasAttribute(
                "xml:lang"):
            self.lang = node.getAttribute("xml:lang").lower()
            if len(self.lang) == 0: self.lang = None

        #-----------------------------------------------------------------
        # Set the default namespace. Used when generating XML Literals
        if node.hasAttribute("xmlns"):
            self.defaultNS = node.getAttribute("xmlns")
        elif inherited_state and inherited_state.defaultNS != None:
            self.defaultNS = inherited_state.defaultNS
        else:
            self.defaultNS = None