def _load_all_graphs(self, progress, trip_prog): import transaction from rdflib import plugin from rdflib.parser import Parser, create_input_source idx_fname = pth_join(self.powdir, 'graphs', 'index') triples_read = 0 if exists(idx_fname): dest = self._conf()['rdf.graph'] with open(idx_fname) as index_file: cnt = 0 for l in index_file: cnt += 1 index_file.seek(0) progress.total = cnt with transaction.manager: for l in index_file: fname, ctx = l.strip().split(' ') parser = plugin.get('nt', Parser)() with open(pth_join(self.powdir, 'graphs', fname), 'rb') as f, \ _BatchAddGraph(dest.get_context(ctx), batchsize=4000) as g: parser.parse(create_input_source(f), g) progress.update(1) triples_read += g.count trip_prog.update(g.count) progress.write('Finalizing writes to database...') progress.write('Loaded {:,} triples'.format(triples_read))
def load(self, url): src = VOCAB_SOURCE_MAP.get(str(url), url) if os.path.isfile(url): context_id = create_input_source(url).getPublicId() last_vocab_mtime = self.mtime_map.get(url) vocab_mtime = os.stat(url).st_mtime if not last_vocab_mtime or last_vocab_mtime < vocab_mtime: logger.debug("Parse file: '%s'", url) self.mtime_map[url] = vocab_mtime # use CG as workaround for json-ld always loading as dataset graph = ConjunctiveGraph() graph.parse(src, format=guess_format(src)) self.graph.remove_context(context_id) for s, p, o in graph: self.graph.add((s, p, o, context_id)) return graph else: context_id = url if any(self.graph.triples((None, None, None), context=context_id)): logger.debug("Using context <%s>" % context_id) return self.graph.get_context(context_id) cache_path = self.get_fs_path(url) if os.path.exists(cache_path): logger.debug("Load local copy of <%s> from '%s'", context_id, cache_path) return self.graph.parse(cache_path, format='turtle', publicID=context_id) else: logger.debug("Fetching <%s> to '%s'", context_id, cache_path) graph = self.graph.parse(src, format='rdfa' if url.endswith('html') else None) with open(cache_path, 'w') as f: graph.serialize(f, format='turtle') return graph
def run_tc(tc): parser = RDFaParser() graph = Graph() source = create_input_source(cached_file(tc.html_url), publicID=tc.html_url) parser.parse(source, graph) sparql = open(cached_file(tc.sparql_url)).read() ok = verify_ask(sparql, graph, tc.expected) return ok, sparql, graph
def source_to_json(source): # TODO: conneg for JSON (fix support in rdflib's URLInputSource!) source = create_input_source(source) stream=source.getByteStream() try: return json.load(stream) finally: stream.close()
def source_to_json(source): # TODO: conneg for JSON (fix support in rdflib's URLInputSource!) source = create_input_source(source) stream = source.getByteStream() try: return json.load(stream) finally: stream.close()
def source_to_json(source): # TODO: conneg for JSON (fix support in rdflib's URLInputSource!) source = create_input_source(source, format="json-ld") stream = source.getByteStream() try: return json.load(StringIO(stream.read().decode("utf-8"))) finally: stream.close()
def source_to_json(source): # TODO: conneg for JSON (fix support in rdflib's URLInputSource!) source = create_input_source(source) stream = source.getByteStream() try: if PY3: return json.load(StringIO(stream.read().decode('utf-8'))) else: return json.load(stream) finally: stream.close()
def source_to_json(source): # TODO: conneg for JSON (fix support in rdflib's URLInputSource!) source = create_input_source(source, format='json-ld') stream = source.getByteStream() try: if PY3: return json.load(StringIO(stream.read().decode('utf-8'))) else: return json.load(stream) finally: stream.close()
def compile_stage5(g, docs, **kw): """ Derive initialisation for genetic circuits present in the model """ model, _, _ = get_one(g, (None, RDF["type"], RBMO["Model"])) for _, _, inc in g.triples((model, RBMC["include"], None)): logging.info("stage5: including %s" % inc) fp = create_input_source(inc.toPython()).getByteStream() doc = fp.read() fp.close() docs.insert(0, doc) return docs
def iter(self, inputsource): """Iter f as an N-Quads file.""" inputsource = create_input_source(source=inputsource, format='nquads') source = inputsource.getByteStream() if not hasattr(source, 'read'): raise ParseError("Item to parse must be a file-like object.") source = getreader('utf-8')(source) self.file = source self.buffer = '' while True: self.line = __line = self.readline() if self.line is None: break self.eat(r_wspace) if (not self.line) or self.line.startswith(('#')): continue # The line is empty or a comment try: yield self.parseline() except ParseError as msg: raise ParseError("Invalid line (%s):\n%r" % (msg, __line))
def parse(self, source=None, publicID=None, format="xml", location=None, file=None, data=None, **args): """ Parse source adding the resulting triples to it's own context (sub graph of this graph). See `rdflib.graph.Graph.parse` for documentation on arguments. :Returns: The graph into which the source was parsed. In the case of n3 it returns the root context. """ source = create_input_source(source=source, publicID=publicID, location=location, file=file, data=data) # id = self.context_id(self.absolutize(source.getPublicId())) context = Graph(store=self.store, identifier=publicID and URIRef(publicID) or source.getPublicId()) context.remove((None, None, None)) context.parse(source, publicID=publicID, format=format, location=location, file=file, data=data, **args) return context
def source_to_json(source): if isinstance(source, PythonInputSource): return source.data if isinstance(source, StringInputSource): return json.load(source.getCharacterStream()) # TODO: conneg for JSON (fix support in rdflib's URLInputSource!) source = create_input_source(source, format="json-ld") stream = source.getByteStream() try: # Use character stream as-is, or interpret byte stream as UTF-8 if isinstance(stream, TextIOBase): use_stream = stream else: use_stream = TextIOWrapper(stream, encoding='utf-8') return json.load(use_stream) finally: stream.close()
def parse(self, source=None, publicID=None, format="xml", location=None, file=None, data=None, **args): """ Parse source adding the resulting triples to it's own context (sub graph of this graph). See `rdflib.graph.Graph.parse` for documentation on arguments. :Returns: The graph into which the source was parsed. In the case of n3 it returns the root context. """ source = create_input_source(source=source, publicID=publicID, location=location, file=file, data=data, format=format) #id = self.context_id(self.absolutize(source.getPublicId())) context = Graph(store=self.store, identifier=publicID and URIRef(publicID) or source.getPublicId()) context.remove((None, None, None)) context.parse(source, publicID=publicID, format=format, location=location, file=file, data=data, **args) return context
def addNQ(self, quadio): sink = QuadSink(self) nqp = NQuadsParser(sink) nqp.parse(create_input_source(quadio), sink) sink.store.finalise()
def processRequest(self): """Interpret a request, relay to further processing and prepare response headers.""" global debug if "rdf-translator-dev" in self.request.url: debug = True if self.html == True: self.do_pygmentize = True self.response.headers['Content-Type'] = "text/html" else: if self.target_format == "pretty-xml" or self.target_format == "xml": self.response.headers['Content-Type'] = "application/rdf+xml" elif self.target_format == "n3": self.response.headers['Content-Type'] = "text/n3" elif self.target_format == "turtle": self.response.headers['Content-Type'] = "text/turtle" elif self.target_format == "nquads": self.response.headers['Content-Type'] = "text/x-nquads" elif self.target_format == "nt": self.response.headers['Content-Type'] = "text/plain" elif self.target_format == "trix": self.response.headers['Content-Type'] = "application/xml" elif self.target_format == "rdf-json" or self.target_format == "rdf-json-pretty": self.response.headers['Content-Type'] = "application/json" elif self.target_format == "json-ld": self.response.headers['Content-Type'] = "application/ld+json" elif self.target_format == "rdfa" or self.target_format == "microdata": self.response.headers['Content-Type'] = "text/html" else: self.response.headers['Content-Type'] = "text/plain" if not self.source_format or self.source_format == "detect": if self.content: source = create_input_source(data=self.content, format=self.source_format) self.source_format = source.content_type elif self.page: source = create_input_source(location=self.page, format=self.source_format) self.source_format = source.content_type if self.source_format == "text/html": self.source_format = "rdfa" # microdata is fallback try: self.response_string = "<p style='color: red; font-weight: bold; padding-top: 12px'>Translation failed</p>" if self.content: self.response_string = translator.convert( self.content, do_pygmentize=self.do_pygmentize, file_format="string", source_format=self.source_format, target_format=self.target_format) if self.response_string.strip( ) == "" and self.source_format == "rdfa": # fix microdata test self.response_string = translator.convert( self.content, do_pygmentize=self.do_pygmentize, file_format="string", source_format="microdata", target_format=self.target_format) elif self.page: self.response_string = translator.convert( self.page, do_pygmentize=self.do_pygmentize, file_format="file", source_format=self.source_format, target_format=self.target_format) if self.response_string.strip( ) == "" and self.source_format == "rdfa": # fix microdata test self.response_string = translator.convert( self.page, do_pygmentize=self.do_pygmentize, file_format="file", source_format="microdata", target_format=self.target_format) if self.response_string.strip() == "": raise Exception("empty result returned") except Exception, e: self.response.set_status(500) if debug: tb = traceback.format_exc() e = "<pre style=\"color: red\">" + tb + "</pre>" else: e = "<pre style=\"color: red\">" + str(e) + "</pre>" error_message = "No error message available" if str(e).strip() != "": error_message = "Error message:<br>%s" % str(e) self.response_string = "<p style='color: red; font-weight: bold; padding-top: 12px'>Could not convert from %s to %s for provided resource...<br><br>%s</p>" % ( self.source_format, self.target_format, error_message)
def processRequest(self): """Interpret a request, relay to further processing and prepare response headers.""" global debug if "rdf-translator-dev" in self.request.url: debug = True if self.html == True: self.do_pygmentize = True self.response.headers['Content-Type'] = "text/html" else: if self.target_format == "pretty-xml" or self.target_format == "xml": self.response.headers['Content-Type'] = "application/rdf+xml" elif self.target_format == "n3": self.response.headers['Content-Type'] = "text/n3" elif self.target_format == "turtle": self.response.headers['Content-Type'] = "text/turtle" elif self.target_format == "nquads": self.response.headers['Content-Type'] = "text/x-nquads" elif self.target_format == "nt": self.response.headers['Content-Type'] = "text/plain" elif self.target_format == "trix": self.response.headers['Content-Type'] = "application/xml" elif self.target_format == "rdf-json" or self.target_format == "rdf-json-pretty": self.response.headers['Content-Type'] = "application/json" elif self.target_format == "json-ld": self.response.headers['Content-Type'] = "application/ld+json" elif self.target_format == "rdfa" or self.target_format == "microdata": self.response.headers['Content-Type'] = "text/html" else: self.response.headers['Content-Type'] = "text/plain" if not self.source_format or self.source_format == "detect": if self.content: source = create_input_source(data=self.content, format=self.source_format) self.source_format = source.content_type elif self.page: source = create_input_source(location=self.page, format=self.source_format) self.source_format = source.content_type if self.source_format == "text/html": self.source_format = "rdfa" # microdata is fallback try: self.response_string = "<p style='color: red; font-weight: bold; padding-top: 12px'>Translation failed</p>" if self.content: self.response_string = translator.convert(self.content, do_pygmentize=self.do_pygmentize, file_format="string", source_format=self.source_format, target_format=self.target_format) if self.response_string.strip() == "" and self.source_format == "rdfa": # fix microdata test self.response_string = translator.convert(self.content, do_pygmentize=self.do_pygmentize, file_format="string", source_format="microdata", target_format=self.target_format) elif self.page: self.response_string = translator.convert(self.page, do_pygmentize=self.do_pygmentize, file_format="file", source_format=self.source_format, target_format=self.target_format) if self.response_string.strip() == "" and self.source_format == "rdfa": # fix microdata test self.response_string = translator.convert(self.page, do_pygmentize=self.do_pygmentize, file_format="file", source_format="microdata", target_format=self.target_format) if self.response_string.strip() == "": raise Exception("empty result returned") except Exception, e: self.response.set_status(500) if debug: tb = traceback.format_exc() e = "<pre style=\"color: red\">"+tb+"</pre>" else: e = "<pre style=\"color: red\">"+str(e)+"</pre>" error_message = "No error message available" if str(e).strip() != "": error_message = "Error message:<br>%s" % str(e) self.response_string = "<p style='color: red; font-weight: bold; padding-top: 12px'>Could not convert from %s to %s for provided resource...<br><br>%s</p>" % (self.source_format, self.target_format, error_message)
def parse(self, source=None, publicID=None, format=None, location=None, file=None, data=None, **args): """ Parse source adding the resulting triples to the Graph. The source is specified using one of source, location, file or data. :Parameters: - `source`: An InputSource, file-like object, or string. In the case of a string the string is the location of the source. - `location`: A string indicating the relative or absolute URL of the source. Graph's absolutize method is used if a relative location is specified. - `file`: A file-like object. - `data`: A string containing the data to be parsed. - `format`: Used if format can not be determined from source. Defaults to rdf/xml. - `publicID`: the logical URI to use as the document base. If None specified the document location is used (at least in the case where there is a document location). :Returns: self, the graph instance. Examples: >>> my_data = ''' ... <rdf:RDF ... xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' ... xmlns:rdfs='http://www.w3.org/2000/01/rdf-schema#' ... > ... <rdf:Description> ... <rdfs:label>Example</rdfs:label> ... <rdfs:comment>This is really just an example.</rdfs:comment> ... </rdf:Description> ... </rdf:RDF> ... ''' >>> import tempfile >>> file_name = tempfile.mktemp() >>> f = file(file_name, "w") >>> f.write(my_data) >>> f.close() >>> g = Graph() >>> result = g.parse(data=my_data, format="application/rdf+xml") >>> len(g) 2 >>> g = Graph() >>> result = g.parse(location=file_name, format="application/rdf+xml") >>> len(g) 2 >>> g = Graph() >>> result = g.parse(file=file(file_name, "r"), format="application/rdf+xml") >>> len(g) 2 """ if format=="xml": # warn... backward compat. format = "application/rdf+xml" source = create_input_source(source=source, publicID=publicID, location=location, file=file, data=data, format=format) if format is None: format = source.content_type if format is None: #raise Exception("Could not determin format for %r. You can expicitly specify one with the format argument." % source) format = "application/rdf+xml" parser = plugin.get(format, Parser)() parser.parse(source, self, **args) return self
def parse(self, source=None, publicID=None, format=None, location=None, file=None, data=None, **args): """ Parse source adding the resulting triples to the Graph. The source is specified using one of source, location, file or data. :Parameters: - `source`: An InputSource, file-like object, or string. In the case of a string the string is the location of the source. - `location`: A string indicating the relative or absolute URL of the source. Graph's absolutize method is used if a relative location is specified. - `file`: A file-like object. - `data`: A string containing the data to be parsed. - `format`: Used if format can not be determined from source. Defaults to rdf/xml. - `publicID`: the logical URI to use as the document base. If None specified the document location is used (at least in the case where there is a document location). :Returns: self, the graph instance. Examples: >>> my_data = ''' ... <rdf:RDF ... xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' ... xmlns:rdfs='http://www.w3.org/2000/01/rdf-schema#' ... > ... <rdf:Description> ... <rdfs:label>Example</rdfs:label> ... <rdfs:comment>This is really just an example.</rdfs:comment> ... </rdf:Description> ... </rdf:RDF> ... ''' >>> import tempfile >>> file_name = tempfile.mktemp() >>> f = file(file_name, "w") >>> f.write(my_data) >>> f.close() >>> g = Graph() >>> result = g.parse(data=my_data, format="application/rdf+xml") >>> len(g) 2 >>> g = Graph() >>> result = g.parse(location=file_name, format="application/rdf+xml") >>> len(g) 2 >>> g = Graph() >>> result = g.parse(file=file(file_name, "r"), format="application/rdf+xml") >>> len(g) 2 """ if format == "xml": # warn... backward compat. format = "application/rdf+xml" source = create_input_source(source=source, publicID=publicID, location=location, file=file, data=data, format=format) if format is None: format = source.content_type if format is None: #raise Exception("Could not determin format for %r. You can expicitly specify one with the format argument." % source) format = "application/rdf+xml" parser = plugin.get(format, Parser)() parser.parse(source, self, **args) return self
def read_canonical_from_file(ctx, dest, graph_fname): bag = BatchAddGraph(dest, batchsize=10000) parser = plugin.get('nt', Parser)() with open(graph_fname, 'rb') as f, bag.get_context(ctx) as g: parser.parse(create_input_source(f), g)