Exemplo n.º 1
0
	def rdf_from_sources(self, names, outputFormat = "turtle", rdfOutput = False) :
		"""
		Extract and RDF graph from a list of RDFa sources and serialize them in one graph. The sources are parsed, the RDF
		extracted, and serialization is done in the specified format.
		@param names: list of sources, each can be a URI, a file name, or a file-like object
		@keyword outputFormat: serialization format. Can be one of "turtle", "n3", "xml", "pretty-xml", "nt". "xml", "pretty-xml", "json" or "json-ld". "turtle" and "n3", "xml" and "pretty-xml", and "json" and "json-ld" are synonyms, respectively. Note that the JSON-LD serialization works with RDFLib 3.* only.
		@keyword rdfOutput: controls what happens in case an exception is raised. If the value is False, the caller is responsible handling it; otherwise a graph is returned with an error message included in the processor graph
		@type rdfOutput: boolean
		@return: a serialized RDF Graph
		@rtype: string
		"""
		# This is better because it gives access to the various, non-standard serializations
		# If it does not work because the extra are not installed, fall back to the standard
		# rdlib distribution...
		try :
			from pyRdfaExtras import MyGraph
			graph = MyGraph()
		except :
			graph = Graph()

		graph.bind("xsd", Namespace('http://www.w3.org/2001/XMLSchema#'))
		# the value of rdfOutput determines the reaction on exceptions...
		for name in names :
			self.graph_from_source(name, graph, rdfOutput)
		retval = graph.serialize(format=outputFormat)
		return retval
Exemplo n.º 2
0
    def rdf_from_sources(self, names, outputFormat="turtle", rdfOutput=False):
        """
		Extract and RDF graph from a list of RDFa sources and serialize them in one graph. The sources are parsed, the RDF
		extracted, and serialization is done in the specified format.
		@param names: list of sources, each can be a URI, a file name, or a file-like object
		@keyword outputFormat: serialization format. Can be one of "turtle", "n3", "xml", "pretty-xml", "nt". "xml", "pretty-xml", "json" or "json-ld". "turtle" and "n3", "xml" and "pretty-xml", and "json" and "json-ld" are synonyms, respectively. Note that the JSON-LD serialization works with RDFLib 3.* only.
		@keyword rdfOutput: controls what happens in case an exception is raised. If the value is False, the caller is responsible handling it; otherwise a graph is returned with an error message included in the processor graph
		@type rdfOutput: boolean
		@return: a serialized RDF Graph
		@rtype: string
		"""
        # This is better because it gives access to the various, non-standard serializations
        # If it does not work because the extra are not installed, fall back to the standard
        # rdlib distribution...
        try:
            from pyRdfaExtras import MyGraph
            graph = MyGraph()
        except:
            graph = Graph()

        # graph.bind("xsd", Namespace('http://www.w3.org/2001/XMLSchema#'))
        # the value of rdfOutput determines the reaction on exceptions...
        for name in names:
            self.graph_from_source(name, graph, rdfOutput)
        retval = graph.serialize(format=outputFormat)
        return retval
Exemplo n.º 3
0
    def rdf_from_sources(self,
                         names,
                         outputFormat="pretty-xml",
                         rdfOutput=False):
        """
		Extract and RDF graph from a list of RDFa sources and serialize them in one graph. The sources are parsed, the RDF
		extracted, and serialization is done in the specified format.
		@param names: list of sources, each can be a URI, a file name, or a file-like object
		@keyword outputFormat: serialization format. Can be one of "turtle", "n3", "xml", "pretty-xml", "nt". "xml" and "pretty-xml", as well as "turtle" and "n3" are synonyms.
		@return: a serialized RDF Graph
		@rtype: string
		"""
        try:
            from pyRdfaExtras import MyGraph
            graph = MyGraph()
        except:
            graph = Graph()

        for prefix in _bindings:
            graph.bind(prefix, Namespace(_bindings[prefix]))

        # the value of rdfOutput determines the reaction on exceptions...
        for name in names:
            self.graph_from_source(name, graph, rdfOutput)
        return graph.serialize(format=outputFormat)
Exemplo n.º 4
0
	def rdf_from_sources(self, names, outputFormat = "turtle", rdfOutput = False) :
		"""
		Extract and RDF graph from a list of RDFa sources and serialize them in one graph. The sources are parsed, the RDF
		extracted, and serialization is done in the specified format.
		@param names: list of sources, each can be a URI, a file name, or a file-like object
		@keyword outputFormat: serialization format. Can be one of "turtle", "n3", "xml", "pretty-xml", "nt". "xml" and "pretty-xml", as well as "turtle" and "n3" are synonyms.
		@return: a serialized RDF Graph
		@rtype: string
		"""
		try :
			from pyRdfaExtras import MyGraph
			graph = MyGraph()
		except :
			graph = Graph()

		for prefix in _bindings :
			graph.bind(prefix, Namespace(_bindings[prefix]))

		# the value of rdfOutput determines the reaction on exceptions...
		for name in names :
			self.graph_from_source(name, graph, rdfOutput)
		return graph.serialize(format=outputFormat)
Exemplo n.º 5
0
    def __init__(self,
                 uri,
                 base,
                 media_type,
                 vocab_expansion=False,
                 rdfa_lite=False,
                 embedded_rdf=False):
        """
		@param uri: the URI for the content to be analyzed
		@type uri: file-like object (e.g., when content goes through an HTTP Post) or a string
		@param base: the base URI for the generated RDF
		@type base: string
		@param media_type: Media Type, see the media type management of pyRdfa. If "", the distiller will try to find the media type out.
		@type media_type: pyRdfa.host.MediaTypes value
		"""
        # Create the graphs into which the content is put
        self.default_graph = Graph()
        self.processor_graph = Graph()
        self.uri = uri
        self.base = base
        self.media_type = media_type
        self.embedded_rdf = embedded_rdf
        self.rdfa_lite = rdfa_lite
        self.vocab_expansion = vocab_expansion

        # Get the DOM tree that will be the scaffold for the output
        parser = html5lib.HTMLParser(
            tree=html5lib.treebuilders.getTreeBuilder("dom"))
        self.domtree = parser.parse(html_page % date.today().isoformat())

        # find the warning/error content
        for div in self.domtree.getElementsByTagName("div"):
            if div.hasAttribute("id") and div.getAttribute("id") == "Message":
                self.message = div
                break
        # find the turtle output content
        for pre in self.domtree.getElementsByTagName("pre"):
            if pre.hasAttribute("id") and pre.getAttribute("id") == "output":
                self.code = pre
                break

        self.errors = Errors(self)
Exemplo n.º 6
0
class Validator:
    """
	Shell around the distiller and the error message management.
	@ivar default_graph: default graph for the results
	@ivar processor_graph: processor graph (ie, errors and warnings)
	@ivar uri: file like object or URI of the source
	@ivar base: base value for the generated RDF output
	@ivar media_type: media type of the source
	@ivar vocab_expansion: whether vocabulary expansion should occur or not
	@ivar rdfa_lite: whether RDFa 1.1 Lite should be checked
	@ivar hturtle: whether the embedded turtle should be included in the output
	@ivar domtree: the Document Node of the final domtree where the final HTML code should be added
	@ivar message: the Element Node in the final DOM Tree where the error/warning messages should be added
	@ivar code: the Element Node in the final DOM Tree where the generated code should be added
	@ivar errors: separate class instance to generate the error code
	@type errors: L{Errors}
	"""
    def __init__(self,
                 uri,
                 base,
                 media_type,
                 vocab_expansion=False,
                 rdfa_lite=False,
                 embedded_rdf=False):
        """
		@param uri: the URI for the content to be analyzed
		@type uri: file-like object (e.g., when content goes through an HTTP Post) or a string
		@param base: the base URI for the generated RDF
		@type base: string
		@param media_type: Media Type, see the media type management of pyRdfa. If "", the distiller will try to find the media type out.
		@type media_type: pyRdfa.host.MediaTypes value
		"""
        # Create the graphs into which the content is put
        self.default_graph = Graph()
        self.processor_graph = Graph()
        self.uri = uri
        self.base = base
        self.media_type = media_type
        self.embedded_rdf = embedded_rdf
        self.rdfa_lite = rdfa_lite
        self.vocab_expansion = vocab_expansion

        # Get the DOM tree that will be the scaffold for the output
        parser = html5lib.HTMLParser(
            tree=html5lib.treebuilders.getTreeBuilder("dom"))
        self.domtree = parser.parse(html_page % date.today().isoformat())

        # find the warning/error content
        for div in self.domtree.getElementsByTagName("div"):
            if div.hasAttribute("id") and div.getAttribute("id") == "Message":
                self.message = div
                break
        # find the turtle output content
        for pre in self.domtree.getElementsByTagName("pre"):
            if pre.hasAttribute("id") and pre.getAttribute("id") == "output":
                self.code = pre
                break

        self.errors = Errors(self)

    # end __init__

    def parse(self):
        """
		Parse the RDFa input and store the processor and default graphs. The final media type is also updated.
		"""
        transformers = []
        if self.rdfa_lite:
            from pyRdfa.transform.lite import lite_prune
            transformers.append(lite_prune)

        options = Options(output_default_graph=True,
                          output_processor_graph=True,
                          transformers=transformers,
                          vocab_expansion=self.vocab_expansion,
                          embedded_rdf=self.embedded_rdf,
                          add_informational_messages=True)
        processor = pyRdfa(options=options,
                           base=self.base,
                           media_type=self.media_type)
        processor.graph_from_source(self.uri,
                                    graph=self.default_graph,
                                    pgraph=self.processor_graph,
                                    rdfOutput=True)
        # Extracting some parameters for the error messages
        self.processor = processor

    def complete_DOM(self):
        """
		Add the generated graph, in turtle encoding, as well as the error messages, to the final DOM tree
		"""
        # Add the RDF code in the DOM tree
        outp = self.default_graph.serialize(format="turtle")
        if PY3:
            u = str(outp, encoding='utf-8')
        else:
            u = unicode(outp.decode('utf-8'))

        dstr = self.domtree.createTextNode(u)
        self.code.appendChild(dstr)

        # Settle the error message
        self.errors.interpret()

    def run(self):
        """
		Run the two steps of validation, and return the serialized version of the DOM Tree, ready to be displayed
		"""
        self.parse()
        self.complete_DOM()
        if PY3:
            from html5lib.serializer import serialize
            return serialize(self.domtree, tree='dom')
        else:
            return str(self.domtree.toxml(encoding="utf-8"))