Exemplo n.º 1
0
def get_people(name_or_nick):
    model = get_model()
    sources = ""
    FOAF = Namespace("http://xmlns.com/foaf/0.1/")
    sources = model.subjects(FOAF['name'], Literal(name_or_nick))
    if not sources:
        sources = model.subjects(FOAF['nick'], Literal(name_or_nick))
    people = dict()
    for source in sources:
        people[source] = {}
        props = get_props()
        for prop in props.keys():
            people[source][prop] = list()
            available = model.objects(source, URIRef(props[prop][1]))
            for i in available:
                people[source][prop].append(i)
    if len(people) < 1:
        sources = model.subjects(FOAF['nick'], Literal(name_or_nick))
        for source in sources:
            people[source] = {}
            props = get_props()
            for prop in props.keys():
                people[source][prop] = list()
                available = model.objects(source, URIRef(props[prop][1]))
                for i in available:
                    people[source][prop].append(i)
    return people
Exemplo n.º 2
0
def rss2rdf(rsscontent):
    print "1"

    store.clear()
    query_guids = getAllQuery_guid(rsscontent)

    d = feedparser.parse(rsscontent)

    _idx = 0  # for accessing query_guid
    for item in d['entries']:
        print "*"
        title = _str(item.title)
        print "**"
        t = datetime.strptime(
            item.updated, "%a, %d %b %Y %H:%M:%S GMT")  # get a datetime object
        unixtime = int(mktime(t.timetuple()) + 1e-6 * t.microsecond)
        category = item.tags[
            0].term  # this is the value of category in google's rss feed, specify if the time is a query or a result
        description = item.summary_detail.value
        print "***"
        id = str(item.id)
        link = item.link

        sourceURI = URIRef("http://foolme.csail.mit.edu/lod/google-search#" +
                           id)
        # add common terms
        store.add_triple(sourceURI, ns.RDF['about'], ns.HISTORY['history'])
        store.add_triple(sourceURI, ns.HISTORY['link'], Literal(link))
        store.add_triple(sourceURI, ns.HISTORY['date'], Literal(unixtime))
        print "****"
        # add term specific to query
        if (category == 'web query'):
            store.add_triple(sourceURI, ns.RDF['about'], ns.HISTORY['query'])
            store.add_triple(sourceURI, ns.HISTORY['queryTerm'],
                             Literal(title))
            store.add_triple(sourceURI, ns.HISTORY['resultClicked'],
                             Literal(description[0]))
            print "*****"
        if (category == 'web result'):
            store.add_triple(sourceURI, ns.RDF['about'], ns.HISTORY['result'])
            store.add_triple(sourceURI, ns.HISTORY['resultTitle'],
                             Literal(title))
            smh_guid = query_guids[_idx]
            store.add_triple(sourceURI, ns.HISTORY['from'],
                             ns.GHISTORY[smh_guid])
            _idx += 1
            print "*****--"
    print "2"
    triples = store.writer.graph.serialize(format="n3")

    print triples
    return triples
Exemplo n.º 3
0
 def _pythonToLiteral(self, obj, obj_types):
     """
     obj - a python literal datatype
     obj_types - iterator yielding rdflib.URIRef.URIRef instances
     
     returns rdflib.Literal.Literal instance
     """
     for obj_type in obj_types:
         try:
             return Literal(SchemaToPython[obj_type][1](obj))
         except KeyError:
             pass
     return Literal(SchemaToPythonDefault[1](obj))
Exemplo n.º 4
0
def createHistoryTriples():
    store.clear()

    for history in histories:
        id = str(history.id)
        store.add_triple(ns.BHISTORY_DATA[id], ns.RDF['about'],
                         ns.BHISTORY['history'])
        store.add_triple(ns.BHISTORY_DATA[id], ns.BHISTORY['from'],
                         ns.BHISTORY_DATA[str(history.from_visit)])
        #if the history.form_visit is 0, then it means it's not from any other link, not a follow through link
        store.add_triple(ns.BHISTORY_DATA[id], ns.BHISTORY['place_id'],
                         ns.BPLACE_DATA[str(history.place_id)])
        store.add_triple(ns.BHISTORY_DATA[id], ns.BHISTORY['visit_date'],
                         Literal(history.visit_date))
        store.add_triple(ns.BHISTORY_DATA[id],
                         getVisitRelation(history.visit_type),
                         ns.BPLACE_DATA[str(history.place_id)])

    triples = store.writer.graph.serialize(format="n3")
    print "done with serialization"

    triples = "@prefix xsd: <http://www.w3.org/2001/XMLSchema#>." + triples
    triples = triples.replace("<http://www.w3.org/2001/XMLSchema#long>",
                              "xsd:long")
    triples = triples.replace("<http://www.w3.org/2001/XMLSchema#integer",
                              "xsd:integer")

    writeTriples(triples, "browser_history_data.n3")
Exemplo n.º 5
0
 def f(bindings):
     rt = sFunc(bindings)
     if isinstance(rt, Literal) and rt.datatype == target:
         #Literal already has target datatype
         return rt
     else:
         return Literal(rt, datatype=target)
 def property_element_char(self, data):
     current = self.current
     if current.object is None:
         try:
             current.object = Literal(data, current.language, current.datatype)
         except Error, e:
             self.error(e.msg)                
Exemplo n.º 7
0
    def add_warning(self, txt):
        """Add a warning. A comment triplet is added to the separate "warning" graph.
		@param txt: the warning text. It is preceded by the string "==== pyRdfa Warning ==== "
		"""
        if self.options.warning_graph != None:
            comment = Literal("=== pyRdfa warning === " + txt)
            self.options.warning_graph.add(
                (self.warning_URI_ref, rdfs_comment, comment))
    def node_element_start(self, name, qname, attrs):
        name, atts = self.convert(name, qname, attrs)
        current = self.current
        absolutize = self.absolutize
        next = self.next
        next.start = self.property_element_start
        next.end = self.property_element_end

        if name in NODE_ELEMENT_EXCEPTIONS:
            self.error("Invalid node element URI: %s" % name)

        if ID in atts:
            if ABOUT in atts or NODE_ID in atts:
                self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID")

            id = atts[ID]
            if not is_ncname(id):
                self.error("rdf:ID value is not a valid NCName: %s" % id)
            subject = absolutize("#%s" % id)
            if subject in self.ids:
                self.error("two elements cannot use the same ID: '%s'" % subject)
            self.ids[subject] = 1 # IDs can only appear once within a document
        elif NODE_ID in atts:
            if ID in atts or ABOUT in atts:
                self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID")
            nodeID = atts[NODE_ID]
            if not is_ncname(nodeID):
                self.error("rdf:nodeID value is not a valid NCName: %s" % nodeID)
            if nodeID in self.bnode:
                subject = self.bnode[nodeID]
            else:
                subject = BNode()
                self.bnode[nodeID] = subject
        elif ABOUT in atts:
            if ID in atts or NODE_ID in atts:
                self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID")
            subject = absolutize(atts[ABOUT])
        else:
            subject = BNode()

        if name!=DESCRIPTION: # S1
            self.store.add((subject, TYPE, absolutize(name)))

        if TYPE in atts: # S2
            self.store.add((subject, TYPE, absolutize(atts[TYPE])))

        language = current.language
        for att in atts:
            if not att.startswith(RDFNS):
                predicate = absolutize(att)
                try:
                    object = Literal(atts[att], language)
                except Error, e:
                    self.error(e.msg)                
            elif att==TYPE: #S2
                predicate = TYPE
                object = absolutize(atts[TYPE])
Exemplo n.º 9
0
 def object2node(object, objectType):
     if objectType == OBJECT_TYPE_RESOURCE:
         return URI2node(object)
     else:
         kwargs = {}
         if objectType.find(':') > -1:
             kwargs['datatype'] = objectType
         elif len(objectType) > 1:  #must be a language id
             kwargs['lang'] = objectType
         return Literal(object, **kwargs)
Exemplo n.º 10
0
def createTerm(termString,
               termType,
               store,
               objLanguage=None,
               objDatatype=None):
    if termType == 'L':
        cache = store.literalCache.get((termString, objLanguage, objDatatype))
        if cache is not None:
            #store.cacheHits += 1
            return cache
        else:
            #store.cacheMisses += 1
            rt = Literal(termString, objLanguage, objDatatype)
            store.literalCache[((termString, objLanguage, objDatatype))] = rt
            return rt
    elif termType == 'F':
        cache = store.otherCache.get((termType, termString))
        if cache is not None:
            #store.cacheHits += 1
            return cache
        else:
            #store.cacheMisses += 1
            rt = QuotedGraph(store, URIRef(termString))
            store.otherCache[(termType, termString)] = rt
            return rt
    elif termType == 'B':
        cache = store.bnodeCache.get((termString))
        if cache is not None:
            #store.cacheHits += 1
            return cache
        else:
            #store.cacheMisses += 1
            rt = TERM_INSTANCIATION_DICT[termType](termString)
            store.bnodeCache[(termString)] = rt
            return rt
    elif termType == 'U':
        cache = store.uriCache.get((termString))
        if cache is not None:
            #store.cacheHits += 1
            return cache
        else:
            #store.cacheMisses += 1
            rt = URIRef(termString)
            store.uriCache[(termString)] = rt
            return rt
    else:
        cache = store.otherCache.get((termType, termString))
        if cache is not None:
            #store.cacheHits += 1
            return cache
        else:
            #store.cacheMisses += 1
            rt = TERM_INSTANCIATION_DICT[termType](termString)
            store.otherCache[(termType, termString)] = rt
            return rt
Exemplo n.º 11
0
 def statement2rdflib(statement):
     if statement.objectType == OBJECT_TYPE_RESOURCE:
         object = RDFLibModel.URI2node(statement.object)
     else:
         kwargs = {}
         if statement.objectType.find(':') > -1:
             kwargs['datatype'] = statement.objectType
         elif len(statement.objectType) > 1:  #must be a language id
             kwargs['lang'] = statement.objectType
         object = Literal(statement.object, **kwargs)
     return (RDFLibModel.URI2node(statement.subject),
             RDFLibModel.URI2node(statement.predicate), object)
Exemplo n.º 12
0
def createPlaceTriples():

    store.clear()
    #store.add_triple(ns.HISTORY.s1,ns.RDF.about,ns.BROW_HISTORY)
    #print store.writer.graph.serialize(format="n3")
    #    place = places[0]
    for place in places:
        id = str(place.id)
        store.add_triple(ns.BPLACE_DATA[id], ns.RDF['about'],
                         ns.BPLACE['place'])
        store.add_triple(ns.BPLACE_DATA[id], ns.BPLACE['url'],
                         Literal(place.url))
        store.add_triple(ns.BPLACE_DATA[id], ns.BPLACE['title'],
                         Literal(place.title))
        store.add_triple(ns.BPLACE_DATA[id], ns.BPLACE['visitCount'],
                         Literal(place.visit_count))
        store.add_triple(ns.BPLACE_DATA[id], ns.BPLACE['hidden'],
                         Literal(place.hidden))
        store.add_triple(ns.BPLACE_DATA[id], ns.BPLACE['typed'],
                         Literal(place.typed))
        store.add_triple(ns.BPLACE_DATA[id], ns.BPLACE['lastVisitDate'],
                         Literal(place.last_visit_date))

    triples = store.writer.graph.serialize(format="n3")
    print "done with serialization"

    triples = "@prefix xsd: <http://www.w3.org/2001/XMLSchema#> ." + triples
    triples = triples.replace("<http://www.w3.org/2001/XMLSchema#long>",
                              "xsd:long")
    triples = triples.replace("<http://www.w3.org/2001/XMLSchema#integer",
                              "xsd:integer")

    #I don't like the long namespace, and don't know why rdflib only substitue pred. with shorter ns naming in the triple
    writeTriples(triples, "browser_place_data.n3")
Exemplo n.º 13
0
def term(str, default=None):
    """See also from_n3"""
    if not str:
        return default
    elif str.startswith("<") and str.endswith(">"):
        return URIRef(str[1:-1])
    elif str.startswith('"') and str.endswith('"'):
        return Literal(str[1:-1])
    elif str.startswith("_"):
        return BNode(str)
    else:
        msg = "Unknown Term Syntax: '%s'" % str
        raise Exception(msg)
Exemplo n.º 14
0
 def property_element_char(self, data):
     current = self.current
     if current.object is None:
         try:
             current.object = Literal(data, current.language, current.datatype)
         except Error as e:
             self.error(e.msg)
     else:
         if isinstance(current.object, Literal):
             try:
                 current.object += data
             except Error as e:
                 self.error(e.msg)
Exemplo n.º 15
0
def add_person_to_model(person, model, type, url):
    if person.has_key('sha'):
        p = BNode(value="_:p%s" % person['sha'][0])
    else:
        p = BNode()
    props = get_props()
    model.add((URIRef(url), type, p))
    for prop in props.keys():
        if person.has_key(prop):
            for i in person[prop]:
                if props[prop][0] == "uri":
                    node = URIRef(i)
                if props[prop][0] == "literal":
                    node = Literal(i)
                model.add((p, URIRef(props[prop][1]), node))
    model.add((p, URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
               URIRef("http://xmlns.com/foaf/0.1/Person")))
Exemplo n.º 16
0
    def search(self, terms, predicate=None):
        """ Returns a set of all the statements the term occurs in. """
        if predicate and not isinstance(predicate, URIRef):
            _logger.warning("predicate is not a URIRef")
            predicate = URIRef(predicate)
        results = set()
        terms = [Literal(term) for term in stopper(splitter(terms))]

        for term in terms:
            for t in self.triples((None, self.term, term)):
                for o in self.objects(t[0], self.termin):
                    for p in self.triples((predicate, t[0], o)):
                        if self.linked_data is None:
                            results.add((o, p[0], None))
                        else:
                            results.add(
                                (o, p[0], self.linked_data.value(o, p[0])))
        return results
Exemplo n.º 17
0
def _createResource(v):
    """Create an RDFLib Literal instance with the corresponding XML
    Schema datatype set. If the variable is already an RDFLib
    resource, it simply returns the resource; otherwise the
    corresponding Literal.  A SPARQLError Exception is raised if the
    type is not implemented.

    The Literal contains the string representation of the variable (as
    Python does it by default) with the corresponding XML Schema URI
    set.

    @param v: Python variable
    @return: either an RDFLib Literal (if 'v' is not an RDFLib Resource), or the same variable if it is already
    an RDFLib resource (ie, Literal, BNode, or URIRef)
    @raise SPARQLError: if the type of 'v' is not implemented
    """
    if isinstance(v, Literal) or isinstance(v, BNode) or isinstance(v, URIRef):
        # just do nothing
        return v
    else:
        return Literal(v)  # Literal now does the datatype bits
Exemplo n.º 18
0
def _fromkey(key):
    if key.startswith("<") and key.endswith(">"):
        key = key[1:-1].decode("UTF-8")
        if key.startswith("_"):
            key = ''.join(splituri(key))
            return BNode(key)
        return URIRef(key)
    elif key.startswith("_"):
        return BNode(key)
    else:
        m = _literal.match(key)
        if m:
            d = m.groupdict()
            value = d["value"]
            value = unquote(value)
            value = value.decode("UTF-8")
            lang = d["lang"] or ''
            datatype = d["datatype"]
            return Literal(value, lang, datatype)
        else:
            msg = "Unknown Key Syntax: '%s'" % key
            raise Exception(msg)
Exemplo n.º 19
0
def from_n3(s, default=None, backend=None):
    """ Creates the Identifier corresponding to the given n3 string. WARNING: untested, may contain bugs. TODO: add test cases."""
    if not s:
        return default
    if s.startswith('<'):
        return URIRef(s[1:-1])
    elif s.startswith('"'):
        # TODO: would a regex be faster?
        value, rest = rsplit(s, '"', 1)
        value = value[1:]  # strip leading quote
        if rest.startswith("@"):
            if "^^" in rest:
                language, rest = rsplit(rest, '^^', 1)
                language = language[1:]  # strip leading at sign
            else:
                language = rest[1:]  # strip leading at sign
                rest = ''
        else:
            language = None
        if rest.startswith("^^"):
            datatype = rest[3:-1]
        else:
            datatype = None
        value = value.replace('\\"',
                              '"').replace('\\\\',
                                           '\\').decode("unicode-escape")
        return Literal(value, language, datatype)
    elif s.startswith('{'):
        identifier = from_n3(s[1:-1])
        return QuotedGraph(backend, identifier)
    elif s.startswith('['):
        identifier = from_n3(s[1:-1])
        return Graph(backend, identifier)
    else:
        if s.startswith("_:"):
            return BNode(s[2:])
        else:
            return BNode(s)
Exemplo n.º 20
0
import base64, sets
import rdflib  # http://rdflib.net/
from rdflib.Identifier import Identifier as ID
from rdflib.URIRef import URIRef as URI
from rdflib.BNode import BNode
from rdflib.Literal import Literal
from rdflib import RDF, RDFS

RDF_SEQi = "http://www.w3.org/1999/02/22-rdf-syntax-ns#_%s"
MAX_CARD = URI("http://www.w3.org/2002/07/owl#maxCardinality")
CARD = URI("http://www.w3.org/2002/07/owl#cardinality")
RESTRICTION = URI("http://www.w3.org/2002/07/owl#Restriction")
FUNC_PROP = URI("http://www.w3.org/2002/07/owl#FunctionalProperty")
ON_PROP = URI("http://www.w3.org/2002/07/owl#onProperty")
ONE = Literal("1")


class ThingFactory:
    """
    Fed a store, return a factory that can be used to instantiate
    Things into that world.
    """
    def __init__(self, store, schema_store=None, alias_map=None):
        """
        store - rdflib.Graph.Graph instance
        schema_store - rdflib.Graph.Graph instance; defaults to store
        """
        self.store = store
        self.schema_store = schema_store or self.store
        self.alias_map = alias_map or {}
Exemplo n.º 21
0
class TextIndex(ConjunctiveGraph):
    """
    An rdflib graph event handler than indexes text literals that are
    added to a another graph.

    This class lets you 'search' the text literals in an RDF graph.
    Typically in RDF to search for a substring in an RDF graph you
    would have to 'brute force' search every literal string looking
    for your substring.

    Instead, this index stores the words in literals into another
    graph whose structure makes searching for terms much less
    expensive.  It does this by chopping up the literals into words,
    removing very common words (currently only in English) and then
    adding each of those words into an RDF graph that describes the
    statements in the original graph that the word came from.

    First, let's create a graph that will transmit events and a text
    index that will receive those events, and then subscribe the text
    index to the event graph:

      >>> e = ConjunctiveGraph()
      >>> t = TextIndex()
      >>> t.subscribe_to(e)

    When triples are added to the event graph (e) events will be fired
    that trigger event handlers in subscribers.  In this case our only
    subscriber is a text index and its action is to index triples that
    contain literal RDF objects.  Here are 3 such triples:

      >>> e.add((URIRef('a'), URIRef('title'), Literal('one two three')))
      >>> e.add((URIRef('b'), URIRef('title'), Literal('two three four')))
      >>> e.add((URIRef('c'), URIRef('title'), Literal('three four five')))

    Of the three literal objects that were added, they all contain
    five unique terms.  These terms can be queried directly from the
    text index:
    
      >>> t.term_strings() ==  set(['four', 'five', 'three', 'two', 'one'])
      True

    Now we can search for statement that contain certain terms.  Let's
    search for 'one' which occurs in only one of the literals
    provided, 'a'.  This can be queried for:

      >>> t.search('one')
      set([(rdflib.URIRef('a'), rdflib.URIRef('title'), None)])

    'one' and 'five' only occur in one statement each, 'two' and
    'four' occur in two, and 'three' occurs in three statements:

      >>> len(list(t.search('one')))
      1
      >>> len(list(t.search('two')))
      2
      >>> len(list(t.search('three')))
      3
      >>> len(list(t.search('four')))
      2
      >>> len(list(t.search('five')))
      1

    Lets add some more statements with different predicates.

      >>> e.add((URIRef('a'), URIRef('creator'), Literal('michel')))
      >>> e.add((URIRef('b'), URIRef('creator'), Literal('Atilla the one Hun')))
      >>> e.add((URIRef('c'), URIRef('creator'), Literal('michel')))
      >>> e.add((URIRef('d'), URIRef('creator'), Literal('Hun Mung two')))

    Now 'one' occurs in two statements:

      >>> assert len(list(t.search('one'))) == 2

    And 'two' occurs in three statements, here they are:

      >>> t.search('two')
      set([(rdflib.URIRef('d'), rdflib.URIRef('creator'), None), (rdflib.URIRef('a'), rdflib.URIRef('title'), None), (rdflib.URIRef('b'), rdflib.URIRef('title'), None)])

    The predicates that are searched can be restricted by provding an
    argument to 'search()':

      >>> t.search('two', URIRef('creator'))
      set([(rdflib.URIRef('d'), rdflib.URIRef('creator'), None)])

      >>> t.search('two', URIRef(u'title'))
      set([(rdflib.URIRef('a'), rdflib.URIRef('title'), None), (rdflib.URIRef('b'), rdflib.URIRef('title'), None)])

    You can search for more than one term by simply including it in
    the query:
    
      >>> t.search('two three', URIRef(u'title'))
      set([(rdflib.URIRef('c'), rdflib.URIRef('title'), None), (rdflib.URIRef('a'), rdflib.URIRef('title'), None), (rdflib.URIRef('b'), rdflib.URIRef('title'), None)])

    The above query returns all the statements that contain 'two' OR
    'three'.  For the documents that contain 'two' AND 'three', do an
    intersection of two queries:

      >>> t.search('two', URIRef(u'title')).intersection(t.search(u'three', URIRef(u'title')))
      set([(rdflib.URIRef('a'), rdflib.URIRef('title'), None), (rdflib.URIRef('b'), rdflib.URIRef('title'), None)])

    Intersection two queries like this is probably not the most
    efficient way to do it, but for reasonable data sets this isn't a
    problem.  Larger data sets will want to query the graph with
    sparql or something else more efficient.

    In all the above queries, the object of each statement was always
    'None'.  This is because the index graph does not store the object
    data, that would make it very large, and besides the data is
    available in the original data graph.  For convenience, a method
    is provides to 'link' an index graph to a data graph.  This allows
    the index to also provide object data in query results.

      >>> t.link_to(e)
      >>> set([str(i[2]) for i in t.search('two', URIRef(u'title')).intersection(t.search(u'three', URIRef(u'title')))]) ==  set(['two three four', 'one two three'])
      True

    You can remove the link by assigning None:

      >>> t.link_to(None)

    Unindexing means to remove statments from the index graph that
    corespond to a statement in the data graph.  Note that while it is
    possible to remove the index information of the occurances of
    terms in statements, it is not possible to remove the terms
    themselves, terms are 'absolute' and are never removed from the
    index graph.  This is not a problem since languages have finite
    terms:

      >>> e.remove((URIRef('a'), URIRef('creator'), Literal('michel')))
      >>> e.remove((URIRef('b'), URIRef('creator'), Literal('Atilla the one Hun')))
      >>> e.remove((URIRef('c'), URIRef('creator'), Literal('michel')))
      >>> e.remove((URIRef('d'), URIRef('creator'), Literal('Hun Mung two')))

    Now 'one' only occurs in one statement:

      >>> assert len(list(t.search('one'))) == 1

    And 'two' only occurs in two statements, here they are:

      >>> t.search('two')
      set([(rdflib.URIRef('a'), rdflib.URIRef('title'), None), (rdflib.URIRef('b'), rdflib.URIRef('title'), None)])

    The predicates that are searched can be restricted by provding an
    argument to 'search()':

      >>> t.search('two', URIRef(u'creator'))
      set([])

      >>> t.search('two', URIRef(u'title'))
      set([(rdflib.URIRef('a'), rdflib.URIRef('title'), None), (rdflib.URIRef('b'), rdflib.URIRef('title'), None)])

    """

    linked_data = None

    text_index = Namespace('http://rdflib.net/text_index#')
    term = Namespace('http://rdflib.net/text_index#')["term"]
    termin = Namespace('http://rdflib.net/text_index#')["termin"]

    def __init__(self, store='default'):
        super(TextIndex, self).__init__(store)

    def add_handler(self, event):
        if type(event.triple[2]) is Literal:
            self.index(event.triple)

    def remove_handler(self, event):
        if type(event.triple[2]) is Literal:
            self.unindex(event.triple)

    def index(self, (s, p, o)):
        # this code is tricky so it's annotated.  unindex is the reverse of this method.

        if type(
                o
        ) is Literal:  # first, only index statements that have a literal object
            for word in stopper(
                    splitter(o)):  # split the literal and remove any stopwords
                word = Literal(
                    word)  # create a new literal for each word in the object

                # if that word already exists in the statement
                # loop over each context the term occurs in
                if self.value(predicate=self.term, object=word, any=True):
                    for t in set(self.triples((None, self.term, word))):
                        t = t[0]
                        # if the graph does not contain an occurance of the term in the statement's subject
                        # then add it
                        if not (t, self.termin, s) in self:
                            self.add((t, self.termin, s))

                        # ditto for the predicate
                        if not (p, t, s) in self:
                            self.add((p, t, s))

                else:  # if the term does not exist in the graph, add it, and the references to the statement.
                    # t gets used as a predicate, create identifier accordingly (AKA can't be a BNode)
                    h = md5(word.encode('utf-8'))
                    h.update(s.encode('utf-8'))
                    h.update(p.encode('utf-8'))
                    t = self.text_index["term_%s" % h.hexdigest()]
                    self.add((t, self.term, word))
                    self.add((t, self.termin, s))
                    self.add((p, t, s))
    def property_element_start(self, name, qname, attrs):
        name, atts = self.convert(name, qname, attrs)
        current = self.current
        absolutize = self.absolutize        
        next = self.next
        object = None
        current.list = None

        if not name.startswith(RDFNS):
            current.predicate = absolutize(name)            
        elif name==LI:
            current.predicate = current.next_li()
        elif name in PROPERTY_ELEMENT_EXCEPTIONS:
            self.error("Invalid property element URI: %s" % name)
        else:
            current.predicate = absolutize(name)            

        id = atts.get(ID, None)
        if id is not None:
            if not is_ncname(id):
                self.error("rdf:ID value is not a value NCName: %s" % id)
            current.id = absolutize("#%s" % id)
        else:
            current.id = None

        resource = atts.get(RESOURCE, None)
        nodeID = atts.get(NODE_ID, None)
        parse_type = atts.get(PARSE_TYPE, None)
        if resource is not None and nodeID is not None:
            self.error("Property element cannot have both rdf:nodeID and rdf:resource")
        if resource is not None:
            object = absolutize(resource)
            next.start = self.node_element_start
            next.end = self.node_element_end
        elif nodeID is not None:
            if not is_ncname(nodeID):
                self.error("rdf:nodeID value is not a valid NCName: %s" % nodeID)
            if nodeID in self.bnode:
                object = self.bnode[nodeID]
            else:
                subject = BNode()
                self.bnode[nodeID] = subject
                object = subject
            next.start = self.node_element_start
            next.end = self.node_element_end                
        else:
            if parse_type is not None:
                for att in atts:
                    if att!=PARSE_TYPE and att!=ID:
                        self.error("Property attr '%s' now allowed here" % att)
                if parse_type=="Resource": 
                    current.subject = object = BNode()
                    current.char = self.property_element_char                    
                    next.start = self.property_element_start
                    next.end = self.property_element_end
                elif parse_type=="Collection":
                    current.char = None                    
                    next.start = self.node_element_start
                    next.end = self.list_node_element_end
                else: #if parse_type=="Literal":
                     # All other values are treated as Literal
                     # See: http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeOtherPropertyElt
                    #object = Literal("", current.language, XMLLiteral)
                    object = Literal("", "", XMLLiteral)
                    current.char = self.literal_element_char
                    current.declared = {}
                    next.start = self.literal_element_start
                    next.char = self.literal_element_char
                    next.end = self.literal_element_end
                current.object = object
                return
            else:
                object = None
                current.char = self.property_element_char
                next.start = self.node_element_start
                next.end = self.node_element_end                

        datatype = current.datatype = atts.get(DATATYPE, None)
        language = current.language        
        if datatype is not None:
            # TODO: check that there are no atts other than datatype and id
            pass
        else:
            for att in atts:
                if not att.startswith(RDFNS):
                    predicate = absolutize(att)                        
                elif att in PROPERTY_ELEMENT_ATTRIBUTES:
                    continue
                elif att in PROPERTY_ATTRIBUTE_EXCEPTIONS:
                    self.error("""Invalid property attribute URI: %s""" % att)
                else:
                    predicate = absolutize(att)                    

                if att==TYPE:
                    o = URIRef(atts[att])
                else:
                    o = Literal(atts[att], language, datatype)

                if object is None:
                    object = BNode()
                self.store.add((object, predicate, o))
        if object is None:
            object = Literal("", language, datatype)                
        current.object = object
Exemplo n.º 23
0
def len_graph(request):
    """ This Works..."""

    #store = Graph()
    #store.bind("contact", "http://www.example.com/contact#")
    #store.bind("person", "http://www.example.com/person#")
    #store.bind("xs", "http://www.w3.org/2001/XMLSchema#")
    #store.bind("rdfs", "http://www.w3.org/2000/01/rdf-schema#")
    #store.bind("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
    #store.bind("owl", "http://www.w3.org/2002/07/owl#")

    # Declare namespaces to use.
    ns_sn = Namespace("http://www.snee.com/ns/misc#")
    ns_sd = Namespace("http://www.snee.com/docs/")
    ns_dc = Namespace("http://purl.org/dc/elements/1.1/")
    ns_pr = Namespace("http://prismstandard.org/1.0#")

    myfile = '/var/rdf/municipality.rdf'

    # Create storage object for triples.
    store = Graph()

    # Add triples to store.
    graph.add(
        (ns_sd["d1001"], ns_dc["title"], Literal("Sample Acrobat document")))
    graph.add((ns_sd["d1001"], ns_dc["format"], Literal("PDF")))
    graph.add((ns_sd["d1001"], ns_dc["creator"], Literal("Billy Shears")))
    graph.add(
        (ns_sd["d1001"], ns_pr["publicationTime"], Literal("2002-12-19")))

    graph.add((ns_sd["d1002"], ns_dc["title"], Literal("Sample RTF document")))
    graph.add((ns_sd["d1002"], ns_dc["format"], Literal("RTF")))
    graph.add((ns_sd["d1002"], ns_dc["creator"], Literal("Nanker Phelge")))
    graph.add(
        (ns_sd["d1002"], ns_pr["publicationTime"], Literal("2002-12-15")))

    graph.add(
        (ns_sd["d1003"], ns_dc["title"], Literal("Sample LaTeX document")))
    graph.add((ns_sd["d1003"], ns_dc["format"], Literal("LaTeX")))
    graph.add((ns_sd["d1003"], ns_dc["creator"], Literal("Richard Mutt")))
    graph.add(
        (ns_sd["d1003"], ns_pr["publicationTime"], Literal("2002-12-16")))
    graph.add((ns_sd["d1003"], ns_sn["quality"], Literal("pretty good")))

    #store.parse (myfile)
    rdf_subjects = graph.subjects()
    rdf_predicates = graph.predicates()
    rdf_objects = graph.objects()

    select_predicate_by_subject = graph.predicates(subject=ns_sd["d1001"])
    select_object_by_predicate = graph.objects(predicate=ns_dc["title"])

    g = Graph()
    g.parse(myfile, format="xml")
    exec "html = 'the lenght of the graph is: %s'" % len(g)

    context = {'html': html, 'g': select_predicate_by_subject}

    return render_to_response('len_graph.html', context)
                try:
                    object = Literal(atts[att], language)
                except Error, e:
                    self.error(e.msg)                
            elif att==TYPE: #S2
                predicate = TYPE
                object = absolutize(atts[TYPE])
            elif att in NODE_ELEMENT_ATTRIBUTES:
                continue
            elif att in PROPERTY_ATTRIBUTE_EXCEPTIONS: #S3
                self.error("Invalid property attribute URI: %s" % att)
                continue # for when error does not throw an exception
            else:
                predicate = absolutize(att)
                try:
                    object = Literal(atts[att], language)
                except Error, e:
                    self.error(e.msg)                    
            self.store.add((subject, predicate, object))

        current.subject = subject

        
    def node_element_end(self, name, qname):
        self.parent.object = self.current.subject
        
    def property_element_start(self, name, qname, attrs):
        name, atts = self.convert(name, qname, attrs)
        current = self.current
        absolutize = self.absolutize        
        next = self.next
Exemplo n.º 25
0
   xmlns:dc   ="http://purl.org/dc/elements/1.1/"
   xmlns:foaf ="http://xmlns.com/foaf/0.1/"
   xmlns:ns   ="http://example.org/ns#"
   xmlns:dt   ="http://example.org/datatype#"
>
        <rdf:Description>
                <ns:p rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">42</ns:p>
                <ns:p rdf:datatype="http://example.org/datatype#specialDatatype">abc</ns:p>
                <ns:p>2005-02-27</ns:p>
                <ns:p xml:lang="en">cat</ns:p>
        </rdf:Description>
</rdf:RDF>
"""
from testSPARQL import ns_rdf
from testSPARQL import ns_rdfs
from testSPARQL import ns_dc
from testSPARQL import ns_foaf
from testSPARQL import ns_ns

from rdflib.Literal import Literal
import datetime
from rdflib.sparql.graphPattern import GraphPattern

select = ["?v"]
pattern = GraphPattern([("?v", "?p", Literal("cat", lang="en"))])
optional = []
tripleStore = None
expected = '''
?v : (some Bnode id)
'''
Exemplo n.º 26
0
        def prepare_row(row):
            '''
          Convert a single row from the results of the big SPARQL solution
          query to a map from query variables to lexical values.

          :Parameters:
          - `row`: The return value of `fetchone()` on an MySQLdb cursor
          	object after executing the SPARQL solving SQL query.

          Returns a dictionary from SPARQL variable names to one set of
          correct values for the original list of SPARQL triple patterns.
          '''

            # First, turn the list into a map from column names to values.
            row_map = dict(
                zip([description[0] for description in cursor.description],
                    row))

            # As the values are all integers, we must execute another SQL
            # query to map the integers to their lexical values.  This query
            # is straightforward to build, so we can do it here instead of in
            # using helper objects.
            prefix = self._internedId
            columns = []
            from_fragments = []
            where_fragments = []
            substitutions = []
            for varname, column_name, is_object, cluster in variable_columns:
                component_name = "component_" + str(len(from_fragments))
                columns.append(component_name + ".lexical as " + column_name)

                where_fragments.append(component_name + '.id = %s')
                substitutions.append(row_map[column_name])

                term = row_map[column_name + '_term']
                if 'L' == term:
                    from_fragments.append('%s_literals as %s' %
                                          (prefix, component_name))
                    datatype = row_map[column_name + '_datatype']
                    if datatype:
                        from_fragments.append('%s_identifiers as %s_datatype' %
                                              (prefix, component_name))
                        columns.append('%s_datatype.lexical as %s_datatype' %
                                       (component_name, column_name))
                        where_fragments.append(component_name +
                                               '_datatype.id = %s')
                        substitutions.append(datatype)
                else:
                    from_fragments.append('%s_identifiers as %s' %
                                          (prefix, component_name))

            query = ('select\n%s\nfrom\n%s\nwhere\n%s\n' %
                     (', '.join(columns), ',\n'.join(from_fragments),
                      ' and '.join(where_fragments)))
            if self.debug:
                print >> sys.stderr, query, substitutions
            preparation_cursor.execute(query, substitutions)
            prepared_map = dict(
                zip([
                    description[0]
                    for description in preparation_cursor.description
                ], preparation_cursor.fetchone()))

            # Unwrap the elements of `variable_columns`, which provide the
            # original SPARQL variable names and the corresponding SQL column
            # names and management information.  Then map these SPARQL
            # variable names to the correct RDFLib node objects, using the
            # lexical information obtained using the query above.
            new_row = {}
            for varname, column_name, is_object, cluster in variable_columns:
                aVariable = Variable(varname)
                lexical = prepared_map[column_name]
                term = row_map[column_name + '_term']

                if 'L' == term:
                    datatype = prepared_map.get(column_name + '_datatype',
                                                None)
                    if datatype:
                        datatype = URIRef(datatype)
                    language = row_map[column_name + '_language']
                    node = Literal(lexical, datatype=datatype, lang=language)
                elif 'B' == term:
                    node = BNode(lexical)
                elif 'U' == term:
                    node = URIRef(lexical)
                else:
                    raise ValueError('Unknown term type ' + term)

                new_row[aVariable] = node

            return new_row
Exemplo n.º 27
0
                            self.add((p, t, s))

                else:  # if the term does not exist in the graph, add it, and the references to the statement.
                    # t gets used as a predicate, create identifier accordingly (AKA can't be a BNode)
                    h = md5(word.encode('utf-8'))
                    h.update(s.encode('utf-8'))
                    h.update(p.encode('utf-8'))
                    t = self.text_index["term_%s" % h.hexdigest()]
                    self.add((t, self.term, word))
                    self.add((t, self.termin, s))
                    self.add((p, t, s))

    def unindex(self, (s, p, o)):
        if type(o) is Literal:
            for word in stopper(splitter(o)):
                word = Literal(word)
                if self.value(predicate=self.term, object=word, any=True):
                    for t in self.triples((None, self.term, word)):
                        t = t[0]
                        if (t, self.termin, s) in self:
                            self.remove((t, self.termin, s))
                        if (p, t, s) in self:
                            self.remove((p, t, s))

    def terms(self):
        """ Returns a generator that yields all of the term literals in the graph. """
        return set(self.objects(None, self.term))

    def term_strings(self):
        """ Return a list of term strings. """
        return set([str(i) for i in self.terms()])
Exemplo n.º 28
0
Arquivo: surnia.py Projeto: weyls/swap
def runTests(store, resultStore):

    resultStore.add((system, RDFS["label"], Literal("Surnia")))
    resultStore.add((
        system, RDFS["comment"],
        Literal(
            """Surnia is an OWL Full reasoner using Python (including librdf) for language translation, OTTER for inference, and custom axioms.
    """)))

    for testType in (testTypes):
        print
        print "Trying each", testType, "..."
        print
        tests = []
        for s in store.subjects(TYPE, OTEST[testType]):
            tests.append(s)
        for s in store.subjects(TYPE, RTEST[testType]):
            tests.append(s)
        tests.sort()
        for s in tests:

            if failed > maxFailed:
                print
                print "maxFailed reached; testing aborted."
                return

            name = str(s)
            if name.startswith("http://www.w3.org/2002/03owlt/"):
                name = name[len("http://www.w3.org/2002/03owlt/"):]
            if name.startswith("http://www.w3.org/2000/10/rdf-tests/rdfcore/"):
                name = "rdfcore-" + name[
                    len("http://www.w3.org/2000/10/rdf-tests/rdfcore/"):]
            creator = only(store.objects(s, DC["creator"]))
            status = only(store.objects(s, RTEST["status"]))

            if str(status) == "OBSOLETED":
                continue
            if str(status) != "APPROVED":
                continue

            #print "%-40s %-20s %s" % (name, creator, status)

            print s,

            if str(s) in skip:
                print "skipping because '%s'" % skip[str(s)]
                continue

            if testType == "PositiveEntailmentTest":
                pdoc = only(store.objects(s, RTEST["premiseDocument"]))
                cdoc = only(store.objects(s, RTEST["conclusionDocument"]))
                if (cdoc, RDF["type"], RTEST["False-Document"]) in store:
                    # concluding a False-Document is the same as just
                    # being inconsistent
                    cdoc = None
                run(store, s, name, pdoc, cdoc, "Inconsistent", resultStore)
            elif testType == "NegativeEntailmentTest":
                pdoc = only(store.objects(s, RTEST["premiseDocument"]))
                cdoc = only(store.objects(s, RTEST["conclusionDocument"]))
                if (cdoc, RDF["type"], RTEST["False-Document"]) in store:
                    # concluding a False-Document is the same as just
                    # being inconsistent
                    cdoc = None
                run(store, s, name, pdoc, cdoc, "Consistent", resultStore)
            elif testType == "InconsistencyTest":
                idoc = only(store.objects(s, RTEST["inputDocument"]))
                run(store, s, name, idoc, None, "Inconsistent", resultStore)
            elif testType == "ConsistencyTest":
                idoc = only(store.objects(s, RTEST["inputDocument"]))
                run(store, s, name, idoc, None, "Consistent", resultStore)
            else:
                print "skipped, unsupported test type"
Exemplo n.º 29
0
def generate_literal(node, graph, subject, state):
    """Generate the literal the C{@property}, taking into account datatype, etc.
	Note: this method is called only if the C{@property} is indeed present, no need to check. The
	C{@content} property is also treated on the caller side.

	This method is an encoding of the algorithm documented
	U{task force's wiki page<http://www.w3.org/2006/07/SWD/wiki/RDFa/LiteralObject>}.

	@param node: DOM element node
	@param graph: the (RDF) graph to add the properies to
	@param subject: the RDFLib URIRef serving as a subject for the generated triples
	@param state: the current state to be used for the CURIE-s
	@type state: L{State.ExecutionContext}
	@return: whether a triple has been added to the graph or not
	@rtype: Boolean
	"""
    def _get_literal(Pnode):
        """
		Get (recursively) the full text from a DOM Node.

		@param Pnode: DOM Node
		@return: string
		"""
        rc = ""
        for node in Pnode.childNodes:
            if node.nodeType == node.TEXT_NODE:
                rc = rc + node.data
            elif node.nodeType == node.ELEMENT_NODE:
                rc = rc + _get_literal(node)

        # The decision of the group in February 2008 is not to normalize the result by default.
        # This is reflected in the default value of the option
        if state.options.space_preserve:
            return rc
        else:
            return re.sub(r'(\r| |\n|\t)+', " ", rc).strip()

    # end getLiteral

    def _get_XML_literal(Pnode):
        """
		Get (recursively) the XML Literal content of a DOM Node. (Most of the processing is done
		via a C{node.toxml} call of the xml minidom implementation.)

		@param Pnode: DOM Node
		@return: string
		"""
        def collectPrefixes(prefixes, node):
            def addPf(prefx, string):
                pf = string.split(':')[0]
                if pf != string and pf not in prefx: prefx.append(pf)

            # edn addPf

            # first the local name of the node
            addPf(prefixes, node.tagName)
            # get all the attributes and children
            for child in node.childNodes:
                if child.nodeType == node.ELEMENT_NODE:
                    collectPrefixes(prefixes, child)
                elif child.nodeType == node.ATTRIBUTE_NODE:
                    addPf(prefixes, node.child.name)

        # end collectPrefixes

        rc = ""
        prefixes = []
        for node in Pnode.childNodes:
            if node.nodeType == node.ELEMENT_NODE:
                collectPrefixes(prefixes, node)

        for node in Pnode.childNodes:
            if node.nodeType == node.TEXT_NODE:
                rc = rc + node.data
            elif node.nodeType == node.ELEMENT_NODE:
                # Decorate the element with namespaces and lang values
                for prefix in prefixes:
                    if prefix in state.ns and not node.hasAttribute(
                            "xmlns:%s" % prefix):
                        node.setAttribute("xmlns:%s" % prefix,
                                          "%s" % state.ns[prefix])
                # Set the default namespace, if not done (and is available)
                if not node.getAttribute("xmlns") and state.defaultNS != None:
                    node.setAttribute("xmlns", state.defaultNS)
                # Get the lang, if necessary
                if not node.getAttribute("xml:lang") and state.lang != None:
                    node.setAttribute("xml:lang", state.lang)
                rc = rc + node.toxml()
        return rc
        # If XML Literals must be canonicalized for space, then this is the return line:
        #return re.sub(r'(\r| |\n|\t)+'," ",rc).strip()

    # end getXMLLiteral

    retval = False
    # Get the Property URI-s
    props = state.get_resources(node.getAttribute("property"), prop=True)

    # Get, if exists, the value of @datatype
    datatype = ''
    dtset = False
    if node.hasAttribute("datatype"):
        dtset = True
        dt = node.getAttribute("datatype")
        if dt != "":
            datatype = state.get_resource(dt)

    if state.lang != None:
        lang = state.lang
    else:
        lang = ''

    # The simple case: separate @content attribute
    if node.hasAttribute("content"):
        val = node.getAttribute("content")
        object = Literal(node.getAttribute("content"),
                         datatype=datatype,
                         lang=lang)
        # The value of datatype has been set, and the keyword paramaters take care of the rest
    else:
        # see if there *is* a datatype (even if it is empty!)
        if dtset:
            # yep. The Literal content is the pure text part of the current element:
            # We have to check whether the specified datatype is, in fact, and
            # explicit XML Literal
            if datatype == XMLLiteral:
                object = Literal(_get_XML_literal(node), datatype=XMLLiteral)
            else:
                object = Literal(_get_literal(node),
                                 datatype=datatype,
                                 lang=lang)
        else:
            # no controlling @datatype. We have to see if there is markup in the contained
            # element
            if True in [
                    n.nodeType == node.ELEMENT_NODE for n in node.childNodes
            ]:
                # yep, and XML Literal should be generated
                object = Literal(_get_XML_literal(node), datatype=XMLLiteral)
            else:
                object = Literal(_get_literal(node), lang=lang)

    # The object may be empty, for example in an ill-defined <meta> element...
    if object != "":
        for prop in props:
            retval = True
            graph.add((subject, prop, object))

    return True
Exemplo n.º 30
0
 print "Image Annotation Tool"
 print "Enter the URI of a photo to annotate:"
 uri = raw_input("> ")
 mem_model.add((URIRef(uri),
                URIRef("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
                URIRef("http://xmlns.com/foaf/0.1/Image")))
 print "Should I add a thumbnail for this image, by adding '.sized' before the extension?"
 thumbnail = raw_input("> ")
 if thumbnail:
     thumb = "%ssized.%s" % (uri[:-3], uri[-3:])
     mem_model.add((URIRef(uri), FOAF['thumbnail'], URIRef(thumb)))
 print "Enter a title for the photo:"
 title = raw_input("> ")
 mem_model.add(
     (URIRef(uri), URIRef("http://purl.org/dc/elements/1.1/title"),
      Literal(title)))
 print "Enter a description for the photo:"
 description = raw_input("> ")
 mem_model.add(
     (URIRef(uri), URIRef("http://purl.org/dc/elements/1.1/description"),
      Literal(description)))
 while 1:
     print "Photo Creator?"
     person = raw_input("> ")
     people = get_people(person)
     count = 0
     people_array = {}
     if not person: break
     if len(people) > 1:
         for i in people.keys():
             count += 1