示例#1
0
 def _load_all_graphs(self, progress, trip_prog):
     import transaction
     from rdflib import plugin
     from rdflib.parser import Parser, create_input_source
     idx_fname = pth_join(self.powdir, 'graphs', 'index')
     triples_read = 0
     if exists(idx_fname):
         dest = self._conf()['rdf.graph']
         with open(idx_fname) as index_file:
             cnt = 0
             for l in index_file:
                 cnt += 1
             index_file.seek(0)
             progress.total = cnt
             with transaction.manager:
                 for l in index_file:
                     fname, ctx = l.strip().split(' ')
                     parser = plugin.get('nt', Parser)()
                     with open(pth_join(self.powdir, 'graphs', fname), 'rb') as f, \
                             _BatchAddGraph(dest.get_context(ctx), batchsize=4000) as g:
                         parser.parse(create_input_source(f), g)
                     progress.update(1)
                     triples_read += g.count
                     trip_prog.update(g.count)
                 progress.write('Finalizing writes to database...')
     progress.write('Loaded {:,} triples'.format(triples_read))
示例#2
0
 def _load_all_graphs(self, progress, trip_prog):
     import transaction
     from rdflib import plugin
     from rdflib.parser import Parser, create_input_source
     idx_fname = pth_join(self.powdir, 'graphs', 'index')
     triples_read = 0
     if exists(idx_fname):
         dest = self._conf()['rdf.graph']
         with open(idx_fname) as index_file:
             cnt = 0
             for l in index_file:
                 cnt += 1
             index_file.seek(0)
             progress.total = cnt
             with transaction.manager:
                 for l in index_file:
                     fname, ctx = l.strip().split(' ')
                     parser = plugin.get('nt', Parser)()
                     with open(pth_join(self.powdir, 'graphs', fname), 'rb') as f, \
                             _BatchAddGraph(dest.get_context(ctx), batchsize=4000) as g:
                         parser.parse(create_input_source(f), g)
                     progress.update(1)
                     triples_read += g.count
                     trip_prog.update(g.count)
                 progress.write('Finalizing writes to database...')
     progress.write('Loaded {:,} triples'.format(triples_read))
示例#3
0
    def load(self, url):
        src = VOCAB_SOURCE_MAP.get(str(url), url)
        if os.path.isfile(url):
            context_id = create_input_source(url).getPublicId()
            last_vocab_mtime = self.mtime_map.get(url)
            vocab_mtime = os.stat(url).st_mtime
            if not last_vocab_mtime or last_vocab_mtime < vocab_mtime:
                logger.debug("Parse file: '%s'", url)
                self.mtime_map[url] = vocab_mtime
                # use CG as workaround for json-ld always loading as dataset
                graph = ConjunctiveGraph()
                graph.parse(src, format=guess_format(src))
                self.graph.remove_context(context_id)
                for s, p, o in graph:
                    self.graph.add((s, p, o, context_id))
                return graph
        else:
            context_id = url

        if any(self.graph.triples((None, None, None), context=context_id)):
            logger.debug("Using context <%s>" % context_id)
            return self.graph.get_context(context_id)

        cache_path = self.get_fs_path(url)
        if os.path.exists(cache_path):
            logger.debug("Load local copy of <%s> from '%s'", context_id, cache_path)
            return self.graph.parse(cache_path, format='turtle', publicID=context_id)
        else:
            logger.debug("Fetching <%s> to '%s'", context_id, cache_path)
            graph = self.graph.parse(src,
                    format='rdfa' if url.endswith('html') else None)
            with open(cache_path, 'w') as f:
                graph.serialize(f, format='turtle')
            return graph
def run_tc(tc):
    parser = RDFaParser()
    graph = Graph()
    source = create_input_source(cached_file(tc.html_url), publicID=tc.html_url)
    parser.parse(source, graph)
    sparql = open(cached_file(tc.sparql_url)).read()
    ok = verify_ask(sparql, graph, tc.expected)
    return ok, sparql, graph
示例#5
0
def source_to_json(source):
    # TODO: conneg for JSON (fix support in rdflib's URLInputSource!)
    source = create_input_source(source)
    
    stream=source.getByteStream()
    try: 
        return json.load(stream)
    finally: 
        stream.close()
示例#6
0
def source_to_json(source):
    # TODO: conneg for JSON (fix support in rdflib's URLInputSource!)
    source = create_input_source(source)

    stream = source.getByteStream()
    try:
        return json.load(stream)
    finally:
        stream.close()
示例#7
0
文件: util.py 项目: sa-bpelakh/rdflib
def source_to_json(source):
    # TODO: conneg for JSON (fix support in rdflib's URLInputSource!)
    source = create_input_source(source, format="json-ld")

    stream = source.getByteStream()
    try:
        return json.load(StringIO(stream.read().decode("utf-8")))
    finally:
        stream.close()
示例#8
0
def source_to_json(source):
    # TODO: conneg for JSON (fix support in rdflib's URLInputSource!)
    source = create_input_source(source)

    stream = source.getByteStream()
    try:
        if PY3:
            return json.load(StringIO(stream.read().decode('utf-8')))
        else:
            return json.load(stream)
    finally:
        stream.close()
示例#9
0
def source_to_json(source):
    # TODO: conneg for JSON (fix support in rdflib's URLInputSource!)
    source = create_input_source(source, format='json-ld')

    stream = source.getByteStream()
    try:
        if PY3:
            return json.load(StringIO(stream.read().decode('utf-8')))
        else:
            return json.load(stream)
    finally:
        stream.close()
示例#10
0
def compile_stage5(g, docs, **kw):
    """
    Derive initialisation for genetic circuits present
    in the model
    """
    model, _, _ = get_one(g, (None, RDF["type"], RBMO["Model"]))
    for _, _, inc in g.triples((model, RBMC["include"], None)):
        logging.info("stage5: including %s" % inc)
        fp = create_input_source(inc.toPython()).getByteStream()
        doc = fp.read()
        fp.close()
        docs.insert(0, doc)
    return docs
示例#11
0
文件: nqstream.py 项目: Darnok99/nel
 def iter(self, inputsource):
     """Iter f as an N-Quads file."""
     inputsource = create_input_source(source=inputsource, format='nquads')
     source = inputsource.getByteStream()
     if not hasattr(source, 'read'):
         raise ParseError("Item to parse must be a file-like object.")
     source = getreader('utf-8')(source)
     self.file = source
     self.buffer = ''
     while True:
         self.line = __line = self.readline()
         if self.line is None:
             break
         self.eat(r_wspace)
         if (not self.line) or self.line.startswith(('#')):
             continue  # The line is empty or a comment
         try:
             yield self.parseline()
         except ParseError as msg:
             raise ParseError("Invalid line (%s):\n%r" % (msg, __line))
示例#12
0
文件: nqstream.py 项目: Darnok99/nel
 def iter(self, inputsource):
     """Iter f as an N-Quads file."""
     inputsource = create_input_source(source=inputsource, format='nquads')
     source = inputsource.getByteStream()
     if not hasattr(source, 'read'):
         raise ParseError("Item to parse must be a file-like object.")
     source = getreader('utf-8')(source)
     self.file = source
     self.buffer = ''
     while True:
         self.line = __line = self.readline()
         if self.line is None:
             break
         self.eat(r_wspace)
         if (not self.line) or self.line.startswith(('#')):
             continue  # The line is empty or a comment
         try:
             yield self.parseline()
         except ParseError as msg:
             raise ParseError("Invalid line (%s):\n%r" % (msg, __line))
示例#13
0
    def parse(self, source=None, publicID=None, format="xml", location=None, file=None, data=None, **args):
        """
        Parse source adding the resulting triples to it's own context
        (sub graph of this graph).

        See `rdflib.graph.Graph.parse` for documentation on arguments.

        :Returns:

        The graph into which the source was parsed. In the case of n3
        it returns the root context.
        """

        source = create_input_source(source=source, publicID=publicID, location=location, file=file, data=data)

        # id = self.context_id(self.absolutize(source.getPublicId()))
        context = Graph(store=self.store, identifier=publicID and URIRef(publicID) or source.getPublicId())
        context.remove((None, None, None))
        context.parse(source, publicID=publicID, format=format, location=location, file=file, data=data, **args)
        return context
示例#14
0
def source_to_json(source):

    if isinstance(source, PythonInputSource):
        return source.data

    if isinstance(source, StringInputSource):
        return json.load(source.getCharacterStream())

    # TODO: conneg for JSON (fix support in rdflib's URLInputSource!)
    source = create_input_source(source, format="json-ld")

    stream = source.getByteStream()
    try:
        # Use character stream as-is, or interpret byte stream as UTF-8
        if isinstance(stream, TextIOBase):
            use_stream = stream
        else:
            use_stream = TextIOWrapper(stream, encoding='utf-8')
        return json.load(use_stream)
    finally:
        stream.close()
示例#15
0
    def parse(self,
              source=None,
              publicID=None,
              format="xml",
              location=None,
              file=None,
              data=None,
              **args):
        """
        Parse source adding the resulting triples to it's own context
        (sub graph of this graph).

        See `rdflib.graph.Graph.parse` for documentation on arguments.

        :Returns:

        The graph into which the source was parsed. In the case of n3
        it returns the root context.
        """

        source = create_input_source(source=source,
                                     publicID=publicID,
                                     location=location,
                                     file=file,
                                     data=data,
                                     format=format)

        #id = self.context_id(self.absolutize(source.getPublicId()))
        context = Graph(store=self.store,
                        identifier=publicID and URIRef(publicID)
                        or source.getPublicId())
        context.remove((None, None, None))
        context.parse(source,
                      publicID=publicID,
                      format=format,
                      location=location,
                      file=file,
                      data=data,
                      **args)
        return context
示例#16
0
 def addNQ(self, quadio):
     sink = QuadSink(self)
     nqp = NQuadsParser(sink)
     nqp.parse(create_input_source(quadio), sink)
     sink.store.finalise()
示例#17
0
    def processRequest(self):
        """Interpret a request, relay to further processing and prepare response headers."""
        global debug
        if "rdf-translator-dev" in self.request.url:
            debug = True

        if self.html == True:
            self.do_pygmentize = True
            self.response.headers['Content-Type'] = "text/html"
        else:
            if self.target_format == "pretty-xml" or self.target_format == "xml":
                self.response.headers['Content-Type'] = "application/rdf+xml"
            elif self.target_format == "n3":
                self.response.headers['Content-Type'] = "text/n3"
            elif self.target_format == "turtle":
                self.response.headers['Content-Type'] = "text/turtle"
            elif self.target_format == "nquads":
                self.response.headers['Content-Type'] = "text/x-nquads"
            elif self.target_format == "nt":
                self.response.headers['Content-Type'] = "text/plain"
            elif self.target_format == "trix":
                self.response.headers['Content-Type'] = "application/xml"
            elif self.target_format == "rdf-json" or self.target_format == "rdf-json-pretty":
                self.response.headers['Content-Type'] = "application/json"
            elif self.target_format == "json-ld":
                self.response.headers['Content-Type'] = "application/ld+json"
            elif self.target_format == "rdfa" or self.target_format == "microdata":
                self.response.headers['Content-Type'] = "text/html"
            else:
                self.response.headers['Content-Type'] = "text/plain"

        if not self.source_format or self.source_format == "detect":
            if self.content:
                source = create_input_source(data=self.content,
                                             format=self.source_format)
                self.source_format = source.content_type
            elif self.page:
                source = create_input_source(location=self.page,
                                             format=self.source_format)
                self.source_format = source.content_type

            if self.source_format == "text/html":
                self.source_format = "rdfa"  # microdata is fallback

        try:
            self.response_string = "<p style='color: red; font-weight: bold; padding-top: 12px'>Translation failed</p>"
            if self.content:
                self.response_string = translator.convert(
                    self.content,
                    do_pygmentize=self.do_pygmentize,
                    file_format="string",
                    source_format=self.source_format,
                    target_format=self.target_format)
                if self.response_string.strip(
                ) == "" and self.source_format == "rdfa":  # fix microdata test
                    self.response_string = translator.convert(
                        self.content,
                        do_pygmentize=self.do_pygmentize,
                        file_format="string",
                        source_format="microdata",
                        target_format=self.target_format)
            elif self.page:
                self.response_string = translator.convert(
                    self.page,
                    do_pygmentize=self.do_pygmentize,
                    file_format="file",
                    source_format=self.source_format,
                    target_format=self.target_format)
                if self.response_string.strip(
                ) == "" and self.source_format == "rdfa":  # fix microdata test
                    self.response_string = translator.convert(
                        self.page,
                        do_pygmentize=self.do_pygmentize,
                        file_format="file",
                        source_format="microdata",
                        target_format=self.target_format)
            if self.response_string.strip() == "":
                raise Exception("empty result returned")
        except Exception, e:
            self.response.set_status(500)
            if debug:
                tb = traceback.format_exc()
                e = "<pre style=\"color: red\">" + tb + "</pre>"
            else:
                e = "<pre style=\"color: red\">" + str(e) + "</pre>"
            error_message = "No error message available"
            if str(e).strip() != "":
                error_message = "Error message:<br>%s" % str(e)
            self.response_string = "<p style='color: red; font-weight: bold; padding-top: 12px'>Could not convert from %s to %s for provided resource...<br><br>%s</p>" % (
                self.source_format, self.target_format, error_message)
示例#18
0
 def processRequest(self):
     """Interpret a request, relay to further processing and prepare response headers."""
     global debug
     if "rdf-translator-dev" in self.request.url:
         debug = True
     
     if self.html == True:
         self.do_pygmentize = True
         self.response.headers['Content-Type'] = "text/html"
     else:
         if self.target_format == "pretty-xml" or self.target_format == "xml":
             self.response.headers['Content-Type'] = "application/rdf+xml"
         elif self.target_format == "n3":
             self.response.headers['Content-Type'] = "text/n3"
         elif self.target_format == "turtle":
             self.response.headers['Content-Type'] = "text/turtle"
         elif self.target_format == "nquads":
             self.response.headers['Content-Type'] = "text/x-nquads"
         elif self.target_format == "nt":
             self.response.headers['Content-Type'] = "text/plain"
         elif self.target_format == "trix":
             self.response.headers['Content-Type'] = "application/xml"
         elif self.target_format == "rdf-json" or self.target_format == "rdf-json-pretty":
             self.response.headers['Content-Type'] = "application/json"
         elif self.target_format == "json-ld":
             self.response.headers['Content-Type'] = "application/ld+json"
         elif self.target_format == "rdfa" or self.target_format == "microdata":
             self.response.headers['Content-Type'] = "text/html"
         else:
             self.response.headers['Content-Type'] = "text/plain"
         
     if not self.source_format or self.source_format == "detect":
         if self.content:
             source = create_input_source(data=self.content, format=self.source_format)
             self.source_format = source.content_type
         elif self.page:
             source = create_input_source(location=self.page, format=self.source_format)
             self.source_format = source.content_type
             
         if self.source_format == "text/html":
             self.source_format = "rdfa" # microdata is fallback
             
     try:
         self.response_string = "<p style='color: red; font-weight: bold; padding-top: 12px'>Translation failed</p>"
         if self.content:
             self.response_string = translator.convert(self.content, do_pygmentize=self.do_pygmentize, file_format="string", source_format=self.source_format, target_format=self.target_format)
             if self.response_string.strip() == "" and self.source_format == "rdfa": # fix microdata test
                 self.response_string = translator.convert(self.content, do_pygmentize=self.do_pygmentize, file_format="string", source_format="microdata", target_format=self.target_format)
         elif self.page:
             self.response_string = translator.convert(self.page, do_pygmentize=self.do_pygmentize, file_format="file", source_format=self.source_format, target_format=self.target_format)
             if self.response_string.strip() == "" and self.source_format == "rdfa": # fix microdata test
                 self.response_string = translator.convert(self.page, do_pygmentize=self.do_pygmentize, file_format="file", source_format="microdata", target_format=self.target_format)
         if self.response_string.strip() == "":
             raise Exception("empty result returned")
     except Exception, e:
         self.response.set_status(500)
         if debug:
             tb = traceback.format_exc()
             e = "<pre style=\"color: red\">"+tb+"</pre>"
         else:
             e = "<pre style=\"color: red\">"+str(e)+"</pre>"
         error_message = "No error message available"
         if str(e).strip() != "":
             error_message = "Error message:<br>%s" % str(e)
         self.response_string = "<p style='color: red; font-weight: bold; padding-top: 12px'>Could not convert from %s to %s for provided resource...<br><br>%s</p>" % (self.source_format, self.target_format, error_message)
示例#19
0
    def parse(self, source=None, publicID=None, format=None,
              location=None, file=None, data=None, **args):
        """
        Parse source adding the resulting triples to the Graph.

        The source is specified using one of source, location, file or
        data.

        :Parameters: - `source`: An InputSource, file-like object, or string. In the case of a string the string is the location of the source.
                     - `location`: A string indicating the relative or absolute URL of the source. Graph's absolutize method is used if a relative location is specified.
                     - `file`: A file-like object.
                     - `data`: A string containing the data to be parsed.
                     - `format`: Used if format can not be determined from source. Defaults to rdf/xml.
                     - `publicID`: the logical URI to use as the document base. If None specified the document location is used (at least in the case where there is a document location).

        :Returns:

        self, the graph instance.

        Examples:

        >>> my_data = '''
        ... <rdf:RDF
        ...   xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'
        ...   xmlns:rdfs='http://www.w3.org/2000/01/rdf-schema#'
        ... >
        ...   <rdf:Description>
        ...     <rdfs:label>Example</rdfs:label>
        ...     <rdfs:comment>This is really just an example.</rdfs:comment>
        ...   </rdf:Description>
        ... </rdf:RDF>
        ... '''
        >>> import tempfile
        >>> file_name = tempfile.mktemp()
        >>> f = file(file_name, "w")
        >>> f.write(my_data)
        >>> f.close()

        >>> g = Graph()
        >>> result = g.parse(data=my_data, format="application/rdf+xml")
        >>> len(g)
        2

        >>> g = Graph()
        >>> result = g.parse(location=file_name, format="application/rdf+xml")
        >>> len(g)
        2

        >>> g = Graph()
        >>> result = g.parse(file=file(file_name, "r"), format="application/rdf+xml")
        >>> len(g)
        2

        """

        if format=="xml":
            # warn... backward compat.
            format = "application/rdf+xml"
        source = create_input_source(source=source, publicID=publicID, location=location, file=file, data=data, format=format)
        if format is None:
            format = source.content_type
        if format is None:
            #raise Exception("Could not determin format for %r. You can expicitly specify one with the format argument." % source)
            format = "application/rdf+xml"
        parser = plugin.get(format, Parser)()
        parser.parse(source, self, **args)
        return self
示例#20
0
    def parse(self,
              source=None,
              publicID=None,
              format=None,
              location=None,
              file=None,
              data=None,
              **args):
        """
        Parse source adding the resulting triples to the Graph.

        The source is specified using one of source, location, file or
        data.

        :Parameters: - `source`: An InputSource, file-like object, or string. In the case of a string the string is the location of the source.
                     - `location`: A string indicating the relative or absolute URL of the source. Graph's absolutize method is used if a relative location is specified.
                     - `file`: A file-like object.
                     - `data`: A string containing the data to be parsed.
                     - `format`: Used if format can not be determined from source. Defaults to rdf/xml.
                     - `publicID`: the logical URI to use as the document base. If None specified the document location is used (at least in the case where there is a document location).

        :Returns:

        self, the graph instance.

        Examples:

        >>> my_data = '''
        ... <rdf:RDF
        ...   xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'
        ...   xmlns:rdfs='http://www.w3.org/2000/01/rdf-schema#'
        ... >
        ...   <rdf:Description>
        ...     <rdfs:label>Example</rdfs:label>
        ...     <rdfs:comment>This is really just an example.</rdfs:comment>
        ...   </rdf:Description>
        ... </rdf:RDF>
        ... '''
        >>> import tempfile
        >>> file_name = tempfile.mktemp()
        >>> f = file(file_name, "w")
        >>> f.write(my_data)
        >>> f.close()

        >>> g = Graph()
        >>> result = g.parse(data=my_data, format="application/rdf+xml")
        >>> len(g)
        2

        >>> g = Graph()
        >>> result = g.parse(location=file_name, format="application/rdf+xml")
        >>> len(g)
        2

        >>> g = Graph()
        >>> result = g.parse(file=file(file_name, "r"), format="application/rdf+xml")
        >>> len(g)
        2

        """

        if format == "xml":
            # warn... backward compat.
            format = "application/rdf+xml"
        source = create_input_source(source=source,
                                     publicID=publicID,
                                     location=location,
                                     file=file,
                                     data=data,
                                     format=format)
        if format is None:
            format = source.content_type
        if format is None:
            #raise Exception("Could not determin format for %r. You can expicitly specify one with the format argument." % source)
            format = "application/rdf+xml"
        parser = plugin.get(format, Parser)()
        parser.parse(source, self, **args)
        return self
示例#21
0
def read_canonical_from_file(ctx, dest, graph_fname):
    bag = BatchAddGraph(dest, batchsize=10000)
    parser = plugin.get('nt', Parser)()
    with open(graph_fname, 'rb') as f, bag.get_context(ctx) as g:
        parser.parse(create_input_source(f), g)