示例#1
0
def _s2f(s, base):
    """make a formula from a string.

    Cribbed from llyn.BI_parsedAsN3
    should be part of the myStore API, no? yes. TODO

    >>> _s2f("<#sky> <#color> <#blue>.", "http://example/socrates")
    {sky color blue}

    ^ that test output depends on the way formulas print themselves.
    """
    # """ emacs python mode needs help

    import notation3
    graph = formula()
    graph.setClosureMode("e")  # Implement sameAs by smushing
    p = notation3.SinkParser(graph.store,
                             openFormula=graph,
                             baseURI=base,
                             thisDoc="data:@@some-formula-string")
    p.startDoc()
    p.feed(s)
    f = p.endDoc()
    f.close()
    bindings = {}
    for s in f.statementsMatching(pred=reason.representedBy):
        val, _, key = s.spo()
        bindings[key] = val
    return f.substitution(bindings)
示例#2
0
def getParser(format, inputURI, workingContext, flags):
    """Return something which can load from a URI in the given format, while
    writing to the given store.
    """
    r = BecauseOfCommandLine(
        sys.argv[0])  # @@ add user, host, pid, date time? Privacy!
    if format == "rdf":
        touch(_store)
        if "l" in flags["rdf"]:
            from rdflib2rdf import RDFXMLParser
        else:
            rdfParserName = os.environ.get("CWM_RDF_PARSER", "sax2rdf")
            if rdfParserName == "rdflib2rdf":
                from rdflib2rdf import RDFXMLParser
            elif rdfParserName == "sax2rdf":
                from sax2rdf import RDFXMLParser
            else:
                raise RuntimeError("Unknown RDF parser: " + rdfParserName)
        return RDFXMLParser(_store,
                            workingContext,
                            inputURI,
                            flags=flags[format],
                            why=r)
    elif format == "n3":
        touch(_store)
        return notation3.SinkParser(_store,
                                    openFormula=workingContext,
                                    thisDoc=inputURI,
                                    why=r)
    else:
        need(lxkb)
        touch(lxkb)
        return LX.language.getParser(language=format, sink=lxkb, flags=flags)
 def parse(self, stream, host=None):
     #
     # make a new SinkParser each time, because it seems
     # to want to know thisDoc at creation time....
     #
     # @@ what is the difference between SinkParser.thisDoc and
     # SinkParser.baseURI?
     #
     # host is not used yet
     #
     uri = stream.info().uri
     print "Using sink", self.sink
     self.sink.dumpPrefixes(sys.stdout)
     p = notation3.SinkParser(self.sink, uri)
     result = p.loadStream(stream)
     self.sink.top = result  #  possible approach....   We need
示例#4
0
def load(store,
         uri=None,
         openFormula=None,
         asIfFrom=None,
         contentType=None,
         flags="",
         referer=None,
         why=None,
         topLevel=False):
    """Get and parse document.  Guesses format if necessary.

    uri:      if None, load from standard input.
    remember: if 1, store as metadata the relationship between this URI and this formula.
    
    Returns:  top-level formula of the parsed document.
    Raises:   IOError, SyntaxError, DocumentError
    
    This is an independent function, as it is fairly independent
    of the store. However, it is natural to call it as a method on the store.
    And a proliferation of APIs confuses.
    """
    #    if referer is None:
    #        raise RuntimeError("We are trying to force things to include a referer header")
    try:
        baseURI = uripath.base()
        if uri != None:
            addr = uripath.join(baseURI, uri)  # Make abs from relative
            if diag.chatty_flag > 40: progress("Taking input from " + addr)
            netStream = urlopenForRDF(addr, referer)
            if diag.chatty_flag > 60:
                progress("   Headers for %s: %s\n" %
                         (addr, netStream.headers.items()))
            receivedContentType = netStream.headers.get(
                HTTP_Content_Type, None)
        else:
            if diag.chatty_flag > 40:
                progress("Taking input from standard input")
            addr = uripath.join(baseURI, "STDIN")  # Make abs from relative
            netStream = sys.stdin
            receivedContentType = None

    #    if diag.chatty_flag > 19: progress("HTTP Headers:" +`netStream.headers`)
    #    @@How to get at all headers??
    #    @@ Get sensible net errors and produce dignostics

        guess = None
        if receivedContentType:
            if diag.chatty_flag > 9:
                progress("Recieved Content-type: " + ` receivedContentType ` +
                         " for " + addr)
            if receivedContentType.find('xml') >= 0 or (
                    receivedContentType.find('rdf') >= 0
                    and not (receivedContentType.find('n3') >= 0)):
                guess = "application/rdf+xml"
            elif receivedContentType.find('n3') >= 0:
                guess = "text/rdf+n3"
        if guess == None and contentType:
            if diag.chatty_flag > 9:
                progress("Given Content-type: " + ` contentType ` + " for " +
                         addr)
            if contentType.find('xml') >= 0 or (
                    contentType.find('rdf') >= 0
                    and not (contentType.find('n3') >= 0)):
                guess = "application/rdf+xml"
            elif contentType.find('n3') >= 0:
                guess = "text/rdf+n3"
            elif contentType.find('sparql') >= 0 or contentType.find('rq'):
                guess = "x-application/sparql"
        buffer = netStream.read()
        if guess == None:

            # can't be XML if it starts with these...
            if buffer[0:1] == "#" or buffer[0:7] == "@prefix":
                guess = 'text/rdf+n3'
            elif buffer[0:6] == 'PREFIX' or buffer[0:4] == 'BASE':
                guess = "x-application/sparql"
            elif buffer.find('xmlns="') >= 0 or buffer.find('xmlns:') >= 0:  #"
                guess = 'application/rdf+xml'
            else:
                guess = 'text/rdf+n3'
            if diag.chatty_flag > 9: progress("Guessed ContentType:" + guess)
    except (IOError, OSError):
        raise DocumentAccessError(addr, sys.exc_info())

    if asIfFrom == None:
        asIfFrom = addr
    if openFormula != None:
        F = openFormula
    else:
        F = store.newFormula()
    if topLevel:
        newTopLevelFormula(F)
    import os
    if guess == "x-application/sparql":
        if diag.chatty_flag > 49: progress("Parsing as SPARQL")
        from sparql import sparql_parser
        import sparql2cwm
        convertor = sparql2cwm.FromSparql(store, F, why=why)
        import StringIO
        p = sparql_parser.N3Parser(StringIO.StringIO(buffer),
                                   sparql_parser.branches, convertor)
        F = p.parse(sparql_parser.start).close()
    elif guess == 'application/rdf+xml':
        if diag.chatty_flag > 49: progress("Parsing as RDF")
        #       import sax2rdf, xml.sax._exceptions
        #       p = sax2rdf.RDFXMLParser(store, F,  thisDoc=asIfFrom, flags=flags)
        if flags == 'rdflib' or int(os.environ.get("CWM_RDFLIB", 0)):
            parser = 'rdflib'
            flags = ''
        else:
            parser = os.environ.get("CWM_RDF_PARSER", "sax2rdf")
        import rdfxml
        p = rdfxml.rdfxmlparser(store,
                                F,
                                thisDoc=asIfFrom,
                                flags=flags,
                                parser=parser,
                                why=why)

        p.feed(buffer)
        F = p.close()
    else:
        assert guess == 'text/rdf+n3'
        if diag.chatty_flag > 49: progress("Parsing as N3")
        if os.environ.get("CWM_N3_PARSER", 0) == 'n3p':
            import n3p_tm
            import triple_maker
            tm = triple_maker.TripleMaker(formula=F, store=store)
            p = n3p_tm.n3p_tm(asIfFrom, tm)
        else:
            p = notation3.SinkParser(store,
                                     F,
                                     thisDoc=asIfFrom,
                                     flags=flags,
                                     why=why)

        try:
            p.startDoc()
            p.feed(buffer)
            p.endDoc()
        except:
            progress("Failed to parse %s" % uri or buffer)
            raise

    if not openFormula:
        F = F.close()
    return F