Пример #1
0
def check_term(conn, term, predicates):
    cur = conn.cursor()
    html = gizmos.tree.build_tree(cur, "obi", term, predicate_ids=predicates)

    # Create the DOM document element
    parser = html5lib.HTMLParser(
        tree=html5lib.treebuilders.getTreeBuilder("dom"))
    dom = parser.parse(html)

    # get the DOM tree
    top = dom.documentElement

    # Create the initial state (from pyRdfa)
    actual = Graph()
    options = Options(
        output_default_graph=True,
        output_processor_graph=True,
        space_preserve=True,
        transformers=[],
        embedded_rdf=True,
        vocab_expansion=False,
        vocab_cache=True,
        vocab_cache_report=False,
        refresh_vocab_cache=False,
        check_lite=False,
        experimental_features=True,
    )
    state = ExecutionContext(
        top,
        actual,
        base="http://purl.obolibrary.org/obo/",
        options=options,
        rdfa_version="1.1",
    )

    # Add the RDFa to the RDFLib graph (recursive)
    parse_one_node(top, actual, None, state, [])

    expected = Graph()
    if predicates:
        expected.parse(f"tests/resources/obi-tree-{term}-predicates.ttl",
                       format="turtle")
    else:
        expected.parse(f"tests/resources/obi-tree-{term}.ttl", format="turtle")

    compare_graphs(actual, expected)
Пример #2
0
def _parse_1_1(node, graph, parent_object, incoming_state, parent_incomplete_triples) :
	"""The (recursive) step of handling a single node. See the
	U{RDFa syntax document<http://www.w3.org/TR/rdfa-syntax>} for further details.
	
	This is the RDFa 1.1 (and higher) version.

	@param node: the DOM node to handle
	@param graph: the RDF graph
	@type graph: RDFLib's Graph object instance
	@param parent_object: the parent's object, as an RDFLib URIRef
	@param incoming_state: the inherited state (namespaces, lang, etc)
	@type incoming_state: L{state.ExecutionContext}
	@param parent_incomplete_triples: list of hanging triples (the missing resource set to None) to be handled (or not)
	by the current node.
	@return: whether the caller has to complete it's parent's incomplete triples
	@rtype: Boolean
	"""

	# Update the state. This means, for example, the possible local settings of
	# namespaces and lang
	state = None
	state = ExecutionContext(node, graph, inherited_state=incoming_state)

	#---------------------------------------------------------------------------------
	# Handle the special case for embedded RDF, eg, in SVG1.2. 
	# This may add some triples to the target graph that does not originate from RDFa parsing
	# If the function return TRUE, that means that an rdf:RDF has been found. No
	# RDFa parsing should be done on that subtree, so we simply return...
	if state.options.host_language in accept_embedded_rdf and node.nodeType == node.ELEMENT_NODE and handle_embeddedRDF(node, graph, state) : 
		return	

	#---------------------------------------------------------------------------------
	# calling the host language specific massaging of the DOM
	if state.options.host_language in host_dom_transforms and node.nodeType == node.ELEMENT_NODE :
		for func in host_dom_transforms[state.options.host_language] : func(node, state)

	#---------------------------------------------------------------------------------
	# First, let us check whether there is anything to do at all. Ie,
	# whether there is any relevant RDFa specific attribute on the element
	#
	if not has_one_of_attributes(node, "href", "resource", "about", "property", "rel", "rev", "typeof", "src", "vocab", "prefix") :
		# nop, there is nothing to do here, just go down the tree and return...
		for n in node.childNodes :
			if n.nodeType == node.ELEMENT_NODE : parse_one_node(n, graph, parent_object, state, parent_incomplete_triples)
		return

	#-----------------------------------------------------------------
	# The goal is to establish the subject and object for local processing
	# The behaviour is slightly different depending on the presense or not
	# of the @rel/@rev attributes
	current_subject = None
	current_object  = None
	typed_resource	= None

	if has_one_of_attributes(node, "rel", "rev")  :
		# in this case there is the notion of 'left' and 'right' of @rel/@rev
		# in establishing the new Subject and the objectResource

		# set first the subject
		if node.hasAttribute("about") :
			current_subject = state.getURI("about")
			if node.hasAttribute("typeof") : typed_resource = current_subject
			
		# get_URI may return None in case of an illegal CURIE, so
		# we have to be careful here, not use only an 'else'
		if current_subject == None :
			current_subject = parent_object
		else :
			state.reset_list_mapping(origin = current_subject)
		
		# set the object resource
		current_object = state.getResource("resource", "href", "src")
			
		if node.hasAttribute("typeof") and not node.hasAttribute("about") :
			if current_object == None :
				current_object = BNode()
			typed_resource = current_object
		
		if not node.hasAttribute("inlist") and current_object != None :
			# In this case the newly defined object is, in fact, the head of the list
			# just reset the whole thing.
			state.reset_list_mapping(origin = current_object)

	elif  node.hasAttribute("property") and not has_one_of_attributes(node, "content", "datatype") :
		# this is the case when the property may take hold of @src and friends...
		if node.hasAttribute("about") :
			current_subject = state.getURI("about")
			if node.hasAttribute("typeof") : typed_resource = current_subject

		# get_URI_ref may return None in case of an illegal CURIE, so
		# we have to be careful here, not use only an 'else'
		if current_subject == None :
			current_subject = parent_object
		else :
			state.reset_list_mapping(origin = current_subject)

		if typed_resource == None and node.hasAttribute("typeof") :
			typed_resource = state.getResource("resource", "href", "src")
			if typed_resource == None :
				typed_resource = BNode()
			current_object = typed_resource
		else :
			current_object = current_subject
			
	else :
		# in this case all the various 'resource' setting attributes
		# behave identically, though they also have their own priority
		current_subject = state.getResource("about", "resource", "href", "src")
			
		# get_URI_ref may return None in case of an illegal CURIE, so
		# we have to be careful here, not use only an 'else'
		if current_subject == None :
			if node.hasAttribute("typeof") :
				current_subject = BNode()
			else :
				current_subject = parent_object
		else :
			state.reset_list_mapping(origin = current_subject)

		# in this case no non-literal triples will be generated, so the
		# only role of the current_object Resource is to be transferred to
		# the children node
		current_object = current_subject
		if node.hasAttribute("typeof") : typed_resource = current_subject
		
	# ---------------------------------------------------------------------
	## The possible typeof indicates a number of type statements on the typed resource
	for defined_type in state.getURI("typeof") :
		if typed_resource :
			graph.add((typed_resource, ns_rdf["type"], defined_type))

	# ---------------------------------------------------------------------
	# In case of @rel/@rev, either triples or incomplete triples are generated
	# the (possible) incomplete triples are collected, to be forwarded to the children
	incomplete_triples  = []
	for prop in state.getURI("rel") :
		if not isinstance(prop,BNode) :
			if node.hasAttribute("inlist") :
				if current_object != None :
					state.add_to_list_mapping(prop, current_object)
				else :
					incomplete_triples.append((None, prop, None))
			else :
				theTriple = (current_subject, prop, current_object)
				if current_object != None :
					graph.add(theTriple)
				else :
					incomplete_triples.append(theTriple)
		else :
			state.options.add_warning(err_no_blank_node % "rel", warning_type=IncorrectBlankNodeUsage, node=node.nodeName)

	for prop in state.getURI("rev") :
		if not isinstance(prop,BNode) :
			theTriple = (current_object,prop,current_subject)
			if current_object != None :
				graph.add(theTriple)
			else :
				incomplete_triples.append(theTriple)
		else :
			state.options.add_warning(err_no_blank_node % "rev", warning_type=IncorrectBlankNodeUsage, node=node.nodeName)

	# ----------------------------------------------------------------------
	# Generation of the literal values. The newSubject is the subject
	# A particularity of property is that it stops the parsing down the DOM tree if an XML Literal is generated,
	# because everything down there is part of the generated literal. 
	if node.hasAttribute("property") :
		ProcessProperty(node, graph, current_subject, state, typed_resource).generate_1_1()

	# ----------------------------------------------------------------------
	# Setting the current object to a bnode is setting up a possible resource
	# for the incomplete triples downwards
	if current_object == None :
		object_to_children = BNode()
	else :
		object_to_children = current_object

	#-----------------------------------------------------------------------
	# Here is the recursion step for all the children
	for n in node.childNodes :
		if n.nodeType == node.ELEMENT_NODE : 
			_parse_1_1(n, graph, object_to_children, state, incomplete_triples)

	# ---------------------------------------------------------------------
	# At this point, the parent's incomplete triples may be completed
	for (s,p,o) in parent_incomplete_triples :
		if s == None and o == None :
			# This is an encoded version of a hanging rel for a collection:
			incoming_state.add_to_list_mapping( p, current_subject )
		else :
			if s == None : s = current_subject
			if o == None : o = current_subject
			graph.add((s,p,o))

	# Generate the lists, if any and if this is the level where a new list was originally created	
	if state.new_list and not state.list_empty() :
		for prop in state.get_list_props() :
			vals  = state.get_list_value(prop)
			heads = [ BNode() for r in vals ] + [ ns_rdf["nil"] ]
			for i in xrange(0, len(vals)) :
				graph.add( (heads[i], ns_rdf["first"], vals[i]) )
				graph.add( (heads[i], ns_rdf["rest"],  heads[i+1]) )
			# Anchor the list
			graph.add( (state.get_list_origin(), prop, heads[0]) )

	# -------------------------------------------------------------------
	# This should be it...
	# -------------------------------------------------------------------
	return
Пример #3
0
def _parse_1_1(node, graph, parent_object, incoming_state,
               parent_incomplete_triples):
    """The (recursive) step of handling a single node. See the
	U{RDFa syntax document<http://www.w3.org/TR/rdfa-syntax>} for further details.
	
	This is the RDFa 1.1 (and higher) version.

	@param node: the DOM node to handle
	@param graph: the RDF graph
	@type graph: RDFLib's Graph object instance
	@param parent_object: the parent's object, as an RDFLib URIRef
	@param incoming_state: the inherited state (namespaces, lang, etc)
	@type incoming_state: L{state.ExecutionContext}
	@param parent_incomplete_triples: list of hanging triples (the missing resource set to None) to be handled (or not)
	by the current node.
	@return: whether the caller has to complete it's parent's incomplete triples
	@rtype: Boolean
	"""

    # Update the state. This means, for example, the possible local settings of
    # namespaces and lang
    state = None
    state = ExecutionContext(node, graph, inherited_state=incoming_state)

    #---------------------------------------------------------------------------------
    # Handle the special case for embedded RDF, eg, in SVG1.2.
    # This may add some triples to the target graph that does not originate from RDFa parsing
    # If the function return TRUE, that means that an rdf:RDF has been found. No
    # RDFa parsing should be done on that subtree, so we simply return...
    if state.options.host_language in accept_embedded_rdf and node.nodeType == node.ELEMENT_NODE and handle_embeddedRDF(
            node, graph, state):
        return

    #---------------------------------------------------------------------------------
    # calling the host language specific massaging of the DOM
    if state.options.host_language in host_dom_transforms and node.nodeType == node.ELEMENT_NODE:
        for func in host_dom_transforms[state.options.host_language]:
            func(node, state)

    #---------------------------------------------------------------------------------
    # First, let us check whether there is anything to do at all. Ie,
    # whether there is any relevant RDFa specific attribute on the element
    #
    if not has_one_of_attributes(node, "href", "resource", "about", "property",
                                 "rel", "rev", "typeof", "src", "vocab",
                                 "prefix"):
        # nop, there is nothing to do here, just go down the tree and return...
        for n in node.childNodes:
            if n.nodeType == node.ELEMENT_NODE:
                parse_one_node(n, graph, parent_object, state,
                               parent_incomplete_triples)
        return

    #-----------------------------------------------------------------
    # The goal is to establish the subject and object for local processing
    # The behaviour is slightly different depending on the presense or not
    # of the @rel/@rev attributes
    current_subject = None
    current_object = None
    typed_resource = None

    if has_one_of_attributes(node, "rel", "rev"):
        # in this case there is the notion of 'left' and 'right' of @rel/@rev
        # in establishing the new Subject and the objectResource

        # set first the subject
        if node.hasAttribute("about"):
            current_subject = state.getURI("about")
            if node.hasAttribute("typeof"): typed_resource = current_subject

        # get_URI may return None in case of an illegal CURIE, so
        # we have to be careful here, not use only an 'else'
        if current_subject == None:
            current_subject = parent_object
        else:
            state.reset_list_mapping(origin=current_subject)

        # set the object resource
        current_object = state.getResource("resource", "href", "src")

        if node.hasAttribute("typeof") and not node.hasAttribute("about"):
            if current_object == None:
                current_object = BNode()
            typed_resource = current_object

        if not node.hasAttribute("inlist") and current_object != None:
            # In this case the newly defined object is, in fact, the head of the list
            # just reset the whole thing.
            state.reset_list_mapping(origin=current_object)

    elif node.hasAttribute("property") and not has_one_of_attributes(
            node, "content", "datatype"):
        # this is the case when the property may take hold of @src and friends...
        if node.hasAttribute("about"):
            current_subject = state.getURI("about")
            if node.hasAttribute("typeof"): typed_resource = current_subject

        # get_URI_ref may return None in case of an illegal CURIE, so
        # we have to be careful here, not use only an 'else'
        if current_subject == None:
            current_subject = parent_object
        else:
            state.reset_list_mapping(origin=current_subject)

        if typed_resource == None and node.hasAttribute("typeof"):
            typed_resource = state.getResource("resource", "href", "src")
            if typed_resource == None:
                typed_resource = BNode()
            current_object = typed_resource
        else:
            current_object = current_subject

    else:
        # in this case all the various 'resource' setting attributes
        # behave identically, though they also have their own priority
        current_subject = state.getResource("about", "resource", "href", "src")

        # get_URI_ref may return None in case of an illegal CURIE, so
        # we have to be careful here, not use only an 'else'
        if current_subject == None:
            if node.hasAttribute("typeof"):
                current_subject = BNode()
            else:
                current_subject = parent_object
        else:
            state.reset_list_mapping(origin=current_subject)

        # in this case no non-literal triples will be generated, so the
        # only role of the current_object Resource is to be transferred to
        # the children node
        current_object = current_subject
        if node.hasAttribute("typeof"): typed_resource = current_subject

    # ---------------------------------------------------------------------
    ## The possible typeof indicates a number of type statements on the typed resource
    for defined_type in state.getURI("typeof"):
        if typed_resource:
            graph.add((typed_resource, ns_rdf["type"], defined_type))

    # ---------------------------------------------------------------------
    # In case of @rel/@rev, either triples or incomplete triples are generated
    # the (possible) incomplete triples are collected, to be forwarded to the children
    incomplete_triples = []
    for prop in state.getURI("rel"):
        if not isinstance(prop, BNode):
            if node.hasAttribute("inlist"):
                if current_object != None:
                    state.add_to_list_mapping(prop, current_object)
                else:
                    incomplete_triples.append((None, prop, None))
            else:
                theTriple = (current_subject, prop, current_object)
                if current_object != None:
                    graph.add(theTriple)
                else:
                    incomplete_triples.append(theTriple)
        else:
            state.options.add_warning(err_no_blank_node % "rel",
                                      warning_type=IncorrectBlankNodeUsage,
                                      node=node.nodeName)

    for prop in state.getURI("rev"):
        if not isinstance(prop, BNode):
            theTriple = (current_object, prop, current_subject)
            if current_object != None:
                graph.add(theTriple)
            else:
                incomplete_triples.append(theTriple)
        else:
            state.options.add_warning(err_no_blank_node % "rev",
                                      warning_type=IncorrectBlankNodeUsage,
                                      node=node.nodeName)

    # ----------------------------------------------------------------------
    # Generation of the literal values. The newSubject is the subject
    # A particularity of property is that it stops the parsing down the DOM tree if an XML Literal is generated,
    # because everything down there is part of the generated literal.
    if node.hasAttribute("property"):
        ProcessProperty(node, graph, current_subject, state,
                        typed_resource).generate_1_1()

    # ----------------------------------------------------------------------
    # Setting the current object to a bnode is setting up a possible resource
    # for the incomplete triples downwards
    if current_object == None:
        object_to_children = BNode()
    else:
        object_to_children = current_object

    #-----------------------------------------------------------------------
    # Here is the recursion step for all the children
    for n in node.childNodes:
        if n.nodeType == node.ELEMENT_NODE:
            _parse_1_1(n, graph, object_to_children, state, incomplete_triples)

    # ---------------------------------------------------------------------
    # At this point, the parent's incomplete triples may be completed
    for (s, p, o) in parent_incomplete_triples:
        if s == None and o == None:
            # This is an encoded version of a hanging rel for a collection:
            incoming_state.add_to_list_mapping(p, current_subject)
        else:
            if s == None: s = current_subject
            if o == None: o = current_subject
            graph.add((s, p, o))

    # Generate the lists, if any and if this is the level where a new list was originally created
    if state.new_list and not state.list_empty():
        for prop in state.get_list_props():
            vals = state.get_list_value(prop)
            heads = [BNode() for r in vals] + [ns_rdf["nil"]]
            for i in xrange(0, len(vals)):
                graph.add((heads[i], ns_rdf["first"], vals[i]))
                graph.add((heads[i], ns_rdf["rest"], heads[i + 1]))
            # Anchor the list
            graph.add((state.get_list_origin(), prop, heads[0]))

    # -------------------------------------------------------------------
    # This should be it...
    # -------------------------------------------------------------------
    return