def __init__(self, node, graph, inherited_state=None, base="", options=None, rdfa_version=None): """ @param node: the current DOM Node @param graph: the RDFLib Graph @keyword inherited_state: the state as inherited from upper layers. This inherited_state is mixed with the state information retrieved from the current node. @type inherited_state: L{state.ExecutionContext} @keyword base: string denoting the base URI for the specific node. This overrides the possible base inherited from the upper layers. The current XHTML+RDFa syntax does not allow the usage of C{@xml:base}, but SVG1.2 does, so this is necessary for SVG (and other possible XML dialects that accept C{@xml:base}) @keyword options: invocation options, and references to warning graphs @type options: L{Options<pyRdfa.options>} """ def remove_frag_id(uri): """ The fragment ID for self.base must be removed """ try: # To be on the safe side:-) t = urlparse(uri) return urlunparse((t[0], t[1], t[2], t[3], t[4], "")) except: return uri # This is, conceptually, an additional class initialization, but it must be done run time, otherwise import errors show up if len(ExecutionContext._resource_type) == 0: ExecutionContext._resource_type = { "href": ExecutionContext._URI, "src": ExecutionContext._URI, "vocab": ExecutionContext._URI, "about": ExecutionContext._CURIEorURI, "resource": ExecutionContext._CURIEorURI, "rel": ExecutionContext._TERMorCURIEorAbsURI, "rev": ExecutionContext._TERMorCURIEorAbsURI, "datatype": ExecutionContext._TERMorCURIEorAbsURI, "typeof": ExecutionContext._TERMorCURIEorAbsURI, "property": ExecutionContext._TERMorCURIEorAbsURI, "role": ExecutionContext._TERMorCURIEorAbsURI, } #----------------------------------------------------------------- self.node = node #----------------------------------------------------------------- # Settling the base. In a generic XML, xml:base should be accepted at all levels (though this is not the # case in, say, XHTML...) # At the moment, it is invoked with a 'None' at the top level of parsing, that is # when the <base> element is looked for (for the HTML cases, that is) if inherited_state: self.rdfa_version = inherited_state.rdfa_version self.base = inherited_state.base self.options = inherited_state.options self.list_mapping = inherited_state.list_mapping self.new_list = False # for generic XML versions the xml:base attribute should be handled if self.options.host_language in accept_xml_base and node.hasAttribute( "xml:base"): self.base = remove_frag_id(node.getAttribute("xml:base")) else: # this is the branch called from the very top self.list_mapping = ListStructure() self.new_list = True if rdfa_version is not None: self.rdfa_version = rdfa_version else: from pyRdfa import rdfa_current_version self.rdfa_version = rdfa_current_version # This value can be overwritten by a @version attribute if node.hasAttribute("version"): top_version = node.getAttribute("version") if top_version.find("RDFa 1.0") != -1 or top_version.find( "RDFa1.0") != -1: self.rdfa_version = "1.0" elif top_version.find("RDFa 1.1") != -1 or top_version.find( "RDFa1.1") != -1: self.rdfa_version = "1.1" # this is just to play safe. I believe this should actually not happen... if options == None: from pyRdfa import Options self.options = Options() else: self.options = options self.base = "" # handle the base element case for HTML if self.options.host_language in [ HostLanguage.xhtml, HostLanguage.html5, HostLanguage.xhtml5 ]: for bases in node.getElementsByTagName("base"): if bases.hasAttribute("href"): self.base = remove_frag_id(bases.getAttribute("href")) continue elif self.options.host_language in accept_xml_base and node.hasAttribute( "xml:base"): self.base = remove_frag_id(node.getAttribute("xml:base")) # If no local setting for base occurs, the input argument has it if self.base == "": self.base = base # Perform an extra beautification in RDFLib if self.options.host_language in beautifying_prefixes: dict = beautifying_prefixes[self.options.host_language] for key in dict: graph.bind(key, dict[key]) input_info = "Input Host Language:%s, RDFa version:%s, base:%s" % ( self.options.host_language, self.rdfa_version, self.base) self.options.add_info(input_info) #----------------------------------------------------------------- # this will be used repeatedly, better store it once and for all... self.parsedBase = urlsplit(self.base) #----------------------------------------------------------------- # generate and store the local CURIE handling class instance self.term_or_curie = TermOrCurie(self, graph, inherited_state) #----------------------------------------------------------------- # Settling the language tags # @lang has priority over @xml:lang # it is a bit messy: the three fundamental modes (xhtml, html, or xml) are all slightly different:-( # first get the inherited state's language, if any if inherited_state: self.lang = inherited_state.lang else: self.lang = None self.supress_lang = False if self.options.host_language in [ HostLanguage.xhtml, HostLanguage.xhtml5, HostLanguage.html5 ]: # we may have lang and xml:lang if node.hasAttribute("lang"): lang = node.getAttribute("lang").lower() else: lang = None if node.hasAttribute("xml:lang"): xmllang = node.getAttribute("xml:lang").lower() else: xmllang = None # First of all, set the value, if any if xmllang != None: # this has priority if len(xmllang) != 0: self.lang = xmllang else: self.lang = None elif lang != None: if len(lang) != 0: self.lang = lang else: self.lang = None # Ideally, a warning should be generated if lang and xmllang are both present with different values. But # the HTML5 Parser does its magic by overriding a lang value if xmllang is present, so the potential # error situations are simply swallowed... elif self.options.host_language in accept_xml_lang and node.hasAttribute( "xml:lang"): self.lang = node.getAttribute("xml:lang").lower() if len(self.lang) == 0: self.lang = None #----------------------------------------------------------------- # Set the default namespace. Used when generating XML Literals if node.hasAttribute("xmlns"): self.defaultNS = node.getAttribute("xmlns") elif inherited_state and inherited_state.defaultNS != None: self.defaultNS = inherited_state.defaultNS else: self.defaultNS = None