Exemplo n.º 1
0
 def discussion_as_graph(self, discussion_id):
     from assembl.models import Discussion, AgentProfile
     local_uri = self.local_uri()
     discussion = Discussion.get(discussion_id)
     d_storage_name = self.discussion_storage_name()
     d_graph_iri = URIRef(self.discussion_graph_iri())
     v = get_virtuoso(self.session, d_storage_name)
     discussion_uri = URIRef(
         Discussion.uri_generic(discussion_id, local_uri))
     subjects = [s for (s,) in v.query(
         """SELECT DISTINCT ?s WHERE {
         ?s assembl:in_conversation %s }""" % (discussion_uri.n3()))]
     subjects.append(discussion_uri)
     participant_ids = list(discussion.get_participants(True))
     profiles = {URIRef(AgentProfile.uri_generic(id, local_uri))
                 for id in participant_ids}
     subjects.extend(profiles)
     # add pseudo-accounts
     subjects.extend((URIRef("%sAgentAccount/%d" % (local_uri, id))
                      for id in participant_ids))
     # print len(subjects)
     cg = ConjunctiveGraph(identifier=d_graph_iri)
     self.add_subject_data(v, cg, subjects)
     # add relationships of non-pseudo accounts
     for ((account, p, profile), g) in v.triples((None, SIOC.account_of, None)):
         if profile in profiles:
             cg.add((account, SIOC.account_of, profile, g))
             # Tempting: simplify with this.
             # cg.add((profile, FOAF.account, account, g))
     for (s, o, g) in v.query(
             '''SELECT ?s ?o ?g WHERE {
             GRAPH ?g {?s catalyst:expressesIdea ?o } .
             ?o assembl:in_conversation %s }''' % (discussion_uri.n3())):
         cg.add((s, CATALYST.expressesIdea, o, g))
     return cg
Exemplo n.º 2
0
    def delete_individual_recursive(self, uri):
        if type(uri) == str:
            uri = URIRef(uri)

        db = get_db()
        db_cursor = db.cursor()

        stack = [uri]
        deleted = []
        while stack:
            u = stack.pop()
            deleted.append(str(u))
            stack += self.get_individual_children(u)

            # remove links from parents
            self.graph.remove((None, None, u))
            db_cursor.execute(
                'UPDATE knowledge SET deleted = ? WHERE subgraph_id = ? AND object = ? AND deleted IS NULL',
                (uri.n3(), self.id, u.n3()))

            # remove links to children
            self.graph.remove((u, None, None))
            db_cursor.execute(
                'UPDATE knowledge SET deleted = ? WHERE subgraph_id = ? AND subject = ? AND deleted IS NULL',
                (uri.n3(), self.id, u.n3()))
        db.commit()

        self.root = None  # forces a rebuild of the root entity

        return deleted
Exemplo n.º 3
0
class Context(GraphOperations):
    def __init__(self, parent, identifier=None, meta=None):
        self.parent = parent
        if identifier is None:
            identifier = BNode()
        self.identifier = URIRef(identifier)
        self.meta = MetaData(self, meta)
        self.meta.generate()

    @property
    def graph(self):
        if not hasattr(self, '_graph') or self._graph is None:
            if self.parent.buffered:
                self._graph = Graph(identifier=self.identifier)
            else:
                self._graph = self.parent.graph.get_context(self.identifier)
        return self._graph

    def add(self, schema, data):
        """ Stage ``data`` as a set of statements, based on the given
        ``schema`` definition. """
        binding = self.get_binding(schema, data)
        uri, triples = triplify(binding)
        for triple in triples:
            self.graph.add(triple)
        return uri

    def save(self):
        """ Transfer the statements in this context over to the main store. """
        if self.parent.buffered:
            query = """
                INSERT DATA { GRAPH %s { %s } }
            """
            query = query % (self.identifier.n3(),
                             self.graph.serialize(format='nt'))
            self.parent.graph.update(query)
            self.flush()
        else:
            self.meta.generate()

    def delete(self):
        """ Delete all statements matching the current context identifier
        from the main store. """
        if self.parent.buffered:
            query = 'CLEAR SILENT GRAPH %s ;' % self.identifier.n3()
            self.parent.graph.update(query)
            self.flush()
        else:
            self.graph.remove((None, None, None))

    def flush(self):
        """ Clear all the pending statements in the local context, without
        transferring them to the main store. """
        self._graph = None

    def __str__(self):
        return self.identifier

    def __repr__(self):
        return '<Context("%s")>' % self.identifier
Exemplo n.º 4
0
 def discussion_as_graph(self, discussion_id):
     from assembl.models import Discussion, AgentProfile
     local_uri = self.local_uri()
     discussion = Discussion.get(discussion_id)
     d_storage_name = self.discussion_storage_name()
     d_graph_iri = URIRef(self.discussion_graph_iri())
     v = get_virtuoso(self.session, d_storage_name)
     discussion_uri = URIRef(
         Discussion.uri_generic(discussion_id, local_uri))
     subjects = [s for (s,) in v.query(
         """SELECT DISTINCT ?s WHERE {
         ?s assembl:in_conversation %s }""" % (discussion_uri.n3()))]
     subjects.append(discussion_uri)
     participant_ids = list(discussion.get_participants(True))
     profiles = {URIRef(AgentProfile.uri_generic(id, local_uri))
                 for id in participant_ids}
     subjects.extend(profiles)
     # add pseudo-accounts
     subjects.extend((URIRef("%sAgentAccount/%d" % (local_uri, id))
                      for id in participant_ids))
     # print len(subjects)
     cg = ConjunctiveGraph(identifier=d_graph_iri)
     self.add_subject_data(v, cg, subjects)
     # add relationships of non-pseudo accounts
     for ((account, p, profile), g) in v.triples((None, SIOC.account_of, None)):
         if profile in profiles:
             cg.add((account, SIOC.account_of, profile, g))
             # Tempting: simplify with this.
             # cg.add((profile, FOAF.account, account, g))
     for (s, o, g) in v.query(
             '''SELECT ?s ?o ?g WHERE {
             GRAPH ?g {?s catalyst:expressesIdea ?o } .
             ?o assembl:in_conversation %s }''' % (discussion_uri.n3())):
         cg.add((s, CATALYST.expressesIdea, o, g))
     return cg
Exemplo n.º 5
0
 def __new__(cls, uri_str):
     u = URIRef(uri_str)
     try:
         u.n3()
     except Exception:
         raise ValueError(f'{uri_str} does not look like a valid URI')
     return uri_str
Exemplo n.º 6
0
    def discussion_as_graph(self, discussion_id):
        self.ensure_discussion_storage(None)
        from assembl.models import Discussion
        d_storage_name = self.discussion_storage_name()
        d_graph_iri = URIRef(self.discussion_graph_iri())
        v = get_virtuoso(self.session, d_storage_name)
        discussion_uri = URIRef(
            Discussion.uri_generic(discussion_id, self.local_uri()))
        subjects = list(v.query(
            """SELECT DISTINCT ?s WHERE {
            ?s assembl:in_conversation %s }""" % (discussion_uri.n3())))
        subjects.append([discussion_uri])
        # print len(subjects)
        cg = ConjunctiveGraph(identifier=d_graph_iri)
        for (s,) in subjects:
            # Absurdly slow. DISTINCT speeds up a lot, but I get numbers.
            for p, o in v.query(
                'SELECT ?p ?o WHERE { graph %s { %s ?p ?o }}' % (
                        d_graph_iri.n3(), s.n3())):
                    cg.add((s, p, o))

        for (s, o, g) in v.query(
                '''SELECT ?s ?o ?g WHERE {
                GRAPH ?g {?s catalyst:expressesIdea ?o } .
                ?o assembl:in_conversation %s }''' % (discussion_uri.n3())):
            cg.add((s, CATALYST.expressesIdea, o, g))

        # TODO: Add roles

        return cg
Exemplo n.º 7
0
    def _compare_pointers(self, synset: dict, pointer_name: str,
                          pointer_uri: URIRef):
        """"""
        compare = True

        # pointers
        pairs = []
        pairso = []
        pairsd = []

        # find pairs with source in this synset
        if pointer_name in synset:
            for pointer in synset[pointer_name]:
                # source senses/synset
                source = self._get_source_target(synset, pointer,
                                                 "source_word")

                # target senses/synset
                target_synset = self.docs[pointer["target_synset"]]
                target = self._get_source_target(target_synset, pointer,
                                                 "target_word")

                # pairs
                if source and target:
                    pairsd.append((source, target))

        # finds pointers
        doc_id = synset["doc_id"]
        synset_uri = SYNSETPT[doc_id]

        query = ("SELECT ?ss ?sw ?swl ?ts ?tw ?twl WHERE{{"
                 "?ss {pointer} ?ts ."
                 "{synset} {hassens} ?ss ."
                 "?ss {hasword} ?sw . ?sw {lexical} ?swl ."
                 "?ts {hasword} ?tw . ?tw {lexical} ?twl . }}")
        result = self.graph.query(
            query.format(synset=synset_uri.n3(),
                         hassens=SCHEMA.containsWordSense.n3(),
                         pointer=pointer_uri.n3(),
                         hasword=SCHEMA.word.n3(),
                         lexical=SCHEMA.lexicalForm.n3()))

        # compares words in synset with dump
        for _, _, source_word, _, _, target_word in result:
            source_word = source_word.toPython().strip()
            target_word = target_word.toPython().strip()
            pair = (source_word, target_word)

            # checks if pair exists in dump
            if pair in pairsd:
                pairs.append(pair)
                pairsd.remove(pair)
            else:
                pairso.append(pair)

        # check if unique words are void
        if len(pairsd) > 0: compare = False
        if len(pairso) > 0: compare = False

        return compare, pairs, pairsd, pairso
Exemplo n.º 8
0
def instance_view_jsonld(request):
    from assembl.semantic.virtuoso_mapping import AssemblQuadStorageManager
    from rdflib import URIRef, ConjunctiveGraph
    ctx = request.context
    user_id = authenticated_userid(request) or Everyone
    permissions = get_permissions(
        user_id, ctx.get_discussion_id())
    instance = ctx._instance
    if not instance.user_can(user_id, CrudPermissions.READ, permissions):
        return HTTPUnauthorized()
    discussion = ctx.get_instance_of_class(Discussion)
    if not discussion:
        raise HTTPNotFound()
    aqsm = AssemblQuadStorageManager()
    uri = URIRef(aqsm.local_uri() + instance.uri()[6:])
    d_storage_name = aqsm.discussion_storage_name(discussion.id)
    v = get_virtuoso(instance.db, d_storage_name)
    cg = ConjunctiveGraph(v, d_storage_name)
    result = cg.triples((uri, None, None))
    #result = v.query('select ?p ?o ?g where {graph ?g {<%s> ?p ?o}}' % uri)
    # Something is wrong here.
    triples = '\n'.join([
        '%s %s %s.' % (uri.n3(), p.n3(), o.n3())
        for (s, p, o) in result
        if '_with_no_name_entry' not in o])
    return aqsm.quads_to_jsonld(triples)
Exemplo n.º 9
0
def export_dataset(
        sparql_endpoint: SPARQLEndpoint = None,
        s3_endpoint: S3ObjectStore = None,
        data_source_code: str = None,
        graph_iri: rdflib.URIRef = None,
        mime=MIME_NTRIPLES
) -> bool:
    log_rule(data_source_code)
    log_item("Exporting Dataset", data_source_code)
    log_item("Named Graph IRI", graph_iri.n3())
    r = requests.get(
        sparql_endpoint.endpoint_url(),
        auth=(sparql_endpoint.user_id(), sparql_endpoint.password()),
        params={'graph': graph_iri},
        headers={'Accept': mime},
        stream=True
    )
    sparql_endpoint.handle_error(r)
    content_encoding = r.headers.get('Content-Encoding')
    # log_item("Content Encoding", content_encoding)
    # log_item("Response Encoding", r.encoding)

    log_item('Uploading as', _s3_file_name(mime, data_source_code))
    uploader = s3_endpoint.uploader_for(
        key=_s3_file_name(mime, data_source_code),
        mime=mime,
        content_encoding=content_encoding,
        dataset_code=data_source_code
    )
    chunk_size = 5 * 1024 * 1024  # 5Mb is minimum
    # for chunk in r.iter_lines(chunk_size=chunk_size, decode_unicode=False):
    # for chunk in r.raw.stream(amt=chunk_size, decode_content=None):
    for chunk in iter_raw(r, chunk_size=chunk_size):
        uploader.part(chunk)
    return uploader.complete()
Exemplo n.º 10
0
 def bulk_update(self, named_graph, graph, size, is_add=True):
     """
     Bulk adds or deletes. Triples are chunked into n size groups before
     sending to API. This prevents the API endpoint from timing out.
     """
     context = URIRef(named_graph)
     total = len(graph)
     if total > 0:
         for set_size, nt in self.nt_yielder(graph, size):
             if is_add is True:
                 logger.debug("Adding {} statements to <{}>.".format(set_size, named_graph))
                 self.update(u'INSERT DATA { GRAPH %s { %s } }' % (context.n3(), nt))
             else:
                 logger.debug("Removing {} statements from <{}>.".format(set_size, named_graph))
                 self.update(u'DELETE DATA { GRAPH %s { %s } }' % (context.n3(), nt))
     return total
Exemplo n.º 11
0
    def get_synthesis_contributors(self, id_only=True):
        # author of important extracts
        from .idea_content_link import Extract
        from .auth import AgentProfile
        from .post import Post
        from sqlalchemy.sql.functions import count
        local_uri = AssemblQuadStorageManager.local_uri()
        discussion_storage = \
            AssemblQuadStorageManager.discussion_storage_name()

        idea_uri = URIRef(self.uri(local_uri))
        clause = '''select distinct ?annotation where {
            %s idea:includes* ?ideaP .
            ?annotation assembl:resourceExpressesIdea ?ideaP }'''
        extract_ids = [x for (x,) in self.db.execute(
            SparqlClause(clause % (
                idea_uri.n3(),),
                quad_storage=discussion_storage.n3()))]
        r = list(self.db.query(AgentProfile.id, count(Extract.id)).join(
            Post, Post.creator_id==AgentProfile.id).join(Extract).filter(
            Extract.important == True, Extract.id.in_(extract_ids)))
        r.sort(key=lambda x: x[1], reverse=True)
        if id_only:
            return [AgentProfile.uri_generic(a) for (a, ce) in r]
        else:
            ids = [a for (a, ce) in r]
            order = {id: order for (order, id) in enumerate(ids)}
            agents = self.db.query(AgentProfile).filter(AgentProfile.id.in_(ids)).all()
            agents.sort(key=lambda a: order[a.id])
            return agents
Exemplo n.º 12
0
def view(rtype, rid):
    if '.' in rid:
        rid, suffix = rid.rsplit('.', 1)
    else:
        suffix = None
    path = rtype + '/' + rid

    uri = URIRef(app.config['RESOURCE_BASE'] + path)
    #if template:
    services = app.config['SERVICES']
    rq = render_template("queries/%s.rq" % rtype,
            prefixes=RQ_PREFIXES, this=uri.n3(), services=services)

    fmt = _conneg_format(suffix)
    if fmt == 'rq':
        return rq, 200, {'Content-Type': 'text/plain'}
    res = run_query(app.config['ENDPOINT'], rq)
    #else:
    #    url = data_base + path + '.n3'
    #    res = requests.get(url)
    graph = to_graph(res.content)
    this = graph.resource(uri)

    if fmt in ('html', 'xhtml'):
        return render_template(rtype + '.html',
                path=path, this=this, curies=graph.qname)
    else:
        headers = {'Content-Type': MIMETYPES.get(fmt) or 'text/plain'}
        fmt = {'rdf': 'xml', 'ttl': 'turtle'}.get(fmt) or fmt
        return graph.serialize(format=fmt), 200, headers
Exemplo n.º 13
0
def instance_view_jsonld(request):
    from assembl.semantic.virtuoso_mapping import AssemblQuadStorageManager
    from rdflib import URIRef, ConjunctiveGraph
    ctx = request.context
    user_id = authenticated_userid(request) or Everyone
    permissions = get_permissions(user_id, ctx.get_discussion_id())
    instance = ctx._instance
    if not instance.user_can(user_id, CrudPermissions.READ, permissions):
        return HTTPUnauthorized()
    discussion = ctx.get_instance_of_class(Discussion)
    if not discussion:
        raise HTTPNotFound()
    aqsm = AssemblQuadStorageManager()
    uri = URIRef(aqsm.local_uri() + instance.uri()[6:])
    d_storage_name = aqsm.discussion_storage_name(discussion.id)
    v = get_virtuoso(instance.db, d_storage_name)
    cg = ConjunctiveGraph(v, d_storage_name)
    result = cg.triples((uri, None, None))
    #result = v.query('select ?p ?o ?g where {graph ?g {<%s> ?p ?o}}' % uri)
    # Something is wrong here.
    triples = '\n'.join([
        '%s %s %s.' % (uri.n3(), p.n3(), o.n3()) for (s, p, o) in result
        if '_with_no_name_entry' not in o
    ])
    return aqsm.quads_to_jsonld(triples)
Exemplo n.º 14
0
Arquivo: fsa.py Projeto: HuaiQiu/ktbs
def match_sparql_ask(transition, event, token, fsa):
    """
    The 'sparql-ask' matcher.

    With this matcher,
    transition conditions are interpreted as the WHERE clause of a SPARQL Ask query,
    where variable ?obs is bound to the considered obsel,
    and prefix m: is bound to the source trace URI.


    """
    m_ns = fsa.source.model_uri
    if m_ns[-1] != '/' and m_ns[-1] != '#':
        m_ns += '#'
    history = token and token.get('history_events')
    if history:
        pred = URIRef(history[-1])
        first = URIRef(history[0])
    else:
        pred = None
        first = None
    condition = transition['condition']
    ## this would be the correct way to do it
    # initBindings = { "obs": URIRef(event), "pred": pred, "first": first }
    ## unfortunately, Virtuoso does not support VALUES clauses after the ASK clause,
    ## which is how SPARQLUpdateStore handles initBindings
    ## so we generate that clause in the condition instead
    condition = """
      BIND (%s as ?obs)
      BIND (%s as ?pred)
      BIND (%s as ?first)
    """ % (
        URIRef(event).n3(),
        pred.n3() if pred else '""',  # simulating NULL
        first.n3() if first else '""',  # simulating NULL
    ) + condition
    ## thank you for nothing Virtuoso :-(

    return fsa.source_obsels_graph.query(
        "ASK { %s }" % condition,
        initNs={
            "": KTBS,
            "m": m_ns
        },
        # initBindings=initBindings, # not supported by Virtuoso :-(
    ).askAnswer
Exemplo n.º 15
0
class ContentProvider(object):

    #implements(IContentProvider)

    def __init__(self, config):
        self.config = config
        self.jena = JenaHelper(config)
        self.harvester = URIRef(config['harvesteruri'])
        data = self.jena.graphquery("DESCRIBE %s" % self.harvester.n3())
        #import pdb; pdb.set_trace()

        self.originatingSource = data.value(self.harvester, ANDS.originatingSource) or Literal(config['originatingsource'])
        self.groupDescription = data.value(self.harvester, ANDS.groupDescription) or Literal(config['groupdescription'])
        self.item_sparql_query = resource_string(__name__, "item_sparql.sparql")


    def set_logger(self, log):
        """Set the logger instance for this class
        """
        self.log = log

    def update(self, from_date=None):
        """Harvests new content added since from_date
        returns a list of content_ids that were changed/added,
        this should be called before get_contents is called
        """
        query = resource_string(__name__, "items_to_harvest.sparql")
        self._content = self.jena.selectquery(query % {'harvester': self.harvester})

        return self._content

    def count(self):
        """Returns number of content objects in the repository
        returns None if number is unknown, this should not be
        called before update is called
        """
        return len(self._content)

    def get_content_ids(self):
        """returns a list/generator of content_ids
        """
        return self._content

    def get_content_by_id(self, id):
        """Return content of a specific id
        """
        # assume id is URIRef instance
        g = Graph(identifier=URIRef(id))
        #print self.item_sparql_query %{"subject":id, 'harvester': self.harvester}
        data = self.jena.graphquery(self.item_sparql_query %{"subject": id,
                                                             'harvester': self.harvester}, g)
        #print data
        # FIXME: make tese conditional
        data.add((URIRef(id), ANDS.originatingSource, self.originatingSource))
        data.add((URIRef(id), ANDS.groupDescription, self.groupDescription))
        return data
Exemplo n.º 16
0
 def test_creation_with_unknown_ns(self):
     uri = 'http://localhost:8000/resource/aggregation/ton-smits-huis/454'
     predicate = RDFPredicate(uri)
     graph = Graph()
     graph.add((URIRef(uri), FOAF.name, Literal("sjoerd")))
     subject = list(graph.subjects())[0]
     uri_ref = URIRef(uri)
     assert uri_ref.n3() == "ns1:454"
     assert predicate is not None
     assert predicate.label is not None
Exemplo n.º 17
0
Arquivo: fsa.py Projeto: ktbs/ktbs
def match_sparql_ask(transition, event, token, fsa):
    """
    The 'sparql-ask' matcher.

    With this matcher,
    transition conditions are interpreted as the WHERE clause of a SPARQL Ask query,
    where variable ?obs is bound to the considered obsel,
    and prefix m: is bound to the source trace URI.


    """
    m_ns = fsa.source.model_uri
    if m_ns[-1] != '/' and m_ns[-1] != '#':
        m_ns += '#'
    history = token and token.get('history_events')
    if history:
        pred = URIRef(history[-1])
        first = URIRef(history[0])
    else:
        pred = None
        first = None
    condition = transition['condition']
    ## this would be the correct way to do it
    # initBindings = { "obs": URIRef(event), "pred": pred, "first": first }
    ## unfortunately, Virtuoso does not support VALUES clauses after the ASK clause,
    ## which is how SPARQLUpdateStore handles initBindings
    ## so we generate that clause in the condition instead
    condition = """
      BIND (%s as ?obs)
      BIND (%s as ?pred)
      BIND (%s as ?first)
    """ % (
          URIRef(event).n3(),
          pred.n3() if pred else '""', # simulating NULL
          first.n3() if first else '""', # simulating NULL
    ) + condition
    ## thank you for nothing Virtuoso :-(

    return fsa.source_obsels_graph.query(
        "ASK { %s }" % condition,
        initNs={"": KTBS, "m": m_ns},
        # initBindings=initBindings, # not supported by Virtuoso :-(
    ).askAnswer
Exemplo n.º 18
0
    def new_value(self, entity_uri, property_uri):
        if type(entity_uri) == str:
            entity_uri = URIRef(entity_uri)
        if type(property_uri) == str:
            property_uri = URIRef(property_uri)

        index = self.get_property_free_index(entity_uri, property_uri)
        value = Literal('')

        self.graph.add((entity_uri, property_uri, value))
        self.properties[(entity_uri, property_uri)][index] = value
        db = get_db()
        db.execute(
            'INSERT INTO knowledge (subgraph_id, subject, predicate, object, property_index) VALUES (?, ?, ?, ?, ?)',
            (self.id, entity_uri.n3(), property_uri.n3(), value.n3(), index))
        db.commit()

        self.root = None  # forces a rebuild of the root entity

        return index
Exemplo n.º 19
0
    def undo_delete_individual(self, uri):
        if type(uri) == str:
            uri = URIRef(uri)

        db = get_db()

        rows = db.execute(
            'SELECT subject, predicate, object FROM knowledge WHERE subgraph_id = ? AND deleted = ?',
            (self.id, uri.n3())).fetchall()

        for row in rows:
            self.graph.add(row_to_rdf(row))

        db.execute(
            'UPDATE knowledge SET deleted = NULL WHERE subgraph_id = ? AND deleted = ?',
            (self.id, uri.n3()))

        db.commit()

        self.root = None  # forces a rebuild of the root entity
Exemplo n.º 20
0
 def check_valid_uri(self, uri):
     """ checks to see if a uri is valid """
     valid = False
     if isinstance(uri, str):
         uri_out = False
         try:
             uri_test = URIRef(uri)
             uri_out = uri_test.n3()
         except:
             # some sort of error thrown, so not valid
             valid = False
         if isinstance(uri_out, str):
             valid = True
     return valid
Exemplo n.º 21
0
 def check_valid_uri(self, uri):
     """ checks to see if a uri is valid """
     valid = False
     if isinstance(uri, str):
         uri_out = False
         try:
             uri_test = URIRef(uri)
             uri_out = uri_test.n3()
         except:
             # some sort of error thrown, so not valid
             valid = False
         if isinstance(uri_out, str):
             valid = True
     return valid
Exemplo n.º 22
0
 def term(self, term, use_prefix=True):
     if isinstance(term, Namespace):
         term = URIRef(term)
     if term is None:
         return RDF.nil
     elif not hasattr(term, 'n3'):
         return self.term(Literal(term))
     elif use_prefix and isinstance(term, URIRef):
         return self.uri(term)
     elif isinstance(term, Literal):
         if term.datatype in (XSD.double, XSD.integer, XSD.float, XSD.boolean):
             return unicode(term).lower()
     elif isinstance(term, Namespace):
         return unicode(term)
     return term.n3()
Exemplo n.º 23
0
    def get_idea_ids_showing_post(cls, post_id):
        "Given a post, give the ID of the ideas that show this message"
        # This works because of a virtuoso bug...
        # where DISTINCT gives IDs instead of URIs.
        from .generic import Content
        discussion_storage = \
            AssemblQuadStorageManager.discussion_storage_name()

        post_uri = URIRef(Content.uri_generic(
            post_id, AssemblQuadStorageManager.local_uri()))
        return [int(id) for (id,) in cls.default_db.execute(SparqlClause(
            '''select distinct ?idea where {
                %s sioc:reply_of* ?post .
                ?post assembl:postLinkedToIdea ?ideaP .
                ?idea idea:includes* ?ideaP  }''' % (post_uri.n3(),),
            quad_storage=discussion_storage.n3()))]
Exemplo n.º 24
0
 def term(self, term, use_prefix=True):
     if isinstance(term, Namespace):
         term = URIRef(term)
     if term is None:
         return RDF.nil
     elif not hasattr(term, 'n3'):
         return self.term(Literal(term))
     elif use_prefix and isinstance(term, URIRef):
         return self.uri(term)
     elif isinstance(term, Literal):
         if term.datatype in (XSD.double, XSD.integer, XSD.float,
                              XSD.boolean):
             return unicode(term).lower()
     elif isinstance(term, Namespace):
         return unicode(term)
     return term.n3()
Exemplo n.º 25
0
    def change_label(self, entity_uri, label):
        entity_uri = URIRef(entity_uri)

        label = Literal(label)

        self.graph.remove((entity_uri, RDFS.label, None))
        self.graph.add((entity_uri, RDFS.label, label))

        db = get_db()
        db.execute(
            'UPDATE knowledge SET object = ? '
            '   WHERE subgraph_id = ? AND subject = ? AND predicate = ?',
            (label.n3(), self.id, entity_uri.n3(), RDFS.label.n3()))
        db.commit()

        self.root = None  # forces a rebuild of the root entity
Exemplo n.º 26
0
    def get_user_by_uri(self, *, user_uri: URIRef) -> FoodKgUser:
        """
        Query the User KG to retrieve a FoodKGUser object with the target URI.

        :param user_uri: the URI of the user to return
        :return: a FoodKGUser object with the target URI
        """
        res_graph = self.get_cache_graph(sparql=J2QueryStrService.j2_query(
            file_name="construct_user_query",
            constraints=[{
                "var_name": "?userUri",
                "var_values": [user_uri.n3()]
            }],
        ))
        user = self._graph_get_user_by_uri(user_uri=user_uri)

        return user
Exemplo n.º 27
0
def canvas_and_images_graph(graph, canvas_uri):
    canvas_uri = URIRef(canvas_uri)

    canvas_graph = Graph()
    canvas_graph += graph.triples((canvas_uri, None, None))

    qres = graph.query("""SELECT ?image_anno ?image WHERE {
        ?image_anno a oa:Annotation .
        ?image_anno oa:hasTarget %s .
        ?image_anno oa:hasBody ?image .
        ?image a ?type .
        FILTER(?type = dcmitype:Image || ?type = dms:Image || ?type = dms:ImageChoice) .
    }""" % (canvas_uri.n3()), initNs=ns)

    for image_anno, image in qres:
        canvas_graph += graph.triples_choices(([image_anno, image], None, None))

    return canvas_graph
Exemplo n.º 28
0
def cli(kb, input):
    g, og = read(input)
    for spo, c in g.store.triples((None, None, None), None):
        s, p, o = spo
        s = fixup(s)
        p = fixup(p)
        o = fixup(o)
        cc = URIRef(c.identifier, base=bbbb)
        spocc = (s, p, o, cc)
    r = "%s %s %s %s .\n" % (s.n3(), p.n3(), o.n3(),
                             ('' if cc == bbbb else cc.n3()))

    #	print()
    #	print()
    print()
    print()
    for l in (og.serialize(format='nquads')).splitlines():
        print(l.decode('utf8'))
Exemplo n.º 29
0
 def term(self, term, use_prefix=True):
     if isinstance(term, (Namespace, ClosedNamespace)):
         term = URIRef(namespace_to_uri(term))
     if term is None:
         return RDF.nil
     elif not hasattr(term, 'n3'):
         return self.term(Literal(term))
     elif use_prefix and isinstance(term, URIRef):
         return self.uri(term)
     elif isinstance(term, Literal):
         if term.datatype in (XSD.double, XSD.integer, XSD.float, XSD.boolean):
             return unicode(term).lower()
         elif use_prefix and term.datatype:  # Abbreviate datatype if possible
             datatype_term = self.uri(term.datatype)
             return '"%s"^^%s' % (term, datatype_term)
     elif isinstance(term, Namespace):
         return unicode(term)
     return term.n3()
Exemplo n.º 30
0
 def term(self, term, use_prefix=True):
     if isinstance(term, (Namespace, ClosedNamespace)):
         term = URIRef(namespace_to_uri(term))
     if term is None:
         return RDF.nil
     elif not hasattr(term, 'n3'):
         return self.term(Literal(term))
     elif use_prefix and isinstance(term, URIRef):
         return self.uri(term)
     elif isinstance(term, Literal):
         if term.datatype in (XSD.double, XSD.integer, XSD.float,
                              XSD.boolean):
             return unicode(term).lower()
         elif use_prefix and term.datatype:  # Abbreviate datatype if possible
             datatype_term = self.uri(term.datatype)
             return '"%s"^^%s' % (term, datatype_term)
     elif isinstance(term, Namespace):
         return unicode(term)
     return term.n3()
Exemplo n.º 31
0
    def getEntity(self, reference, makeIfDoesNotExist: bool = False):
        """Returns a proxy to an entity in the ontology.
        If the object corresponding to the reference already exists in the database then simply the reference
        is returned. If it does not exist, it will be created.

        Parameters
        ----------
        reference : Identifier
            [description]

        makeIfDoesNotExist : bool
            If set to True, a blank entity with the specified reference will be created and returned.
            I.e., an entity will be created even if one does not exist within the database.

        Returns
        -------
        OntoEntity
            A proxy for the entity in the ontology.
        """
        # create empty object as a default return value if no object exist and create is False
        obj = None
        # make sure reference is a valid identifier
        if isinstance(reference, str):
            reference = URIRef(reference)
        # create reference string
        refString = reference.n3()
        # check if the referenced objects is remembered by the API
        if refString in self.__objects:
            obj = self.__objects[refString]
            # check if the referenced object still exists within the DB
            if not obj.exists:
                # if the object is no longer in DB, remove it from the dict and return None
                del self.__objects[refString]
                return None
        else:
            # check if the reference exist within the database
            if self.existEntity(reference):
                # TODO: obj =  # make new object but don't write into DB (it's already there)
                pass
            elif makeIfDoesNotExist:
                obj = self.makeEntity(reference)
        return obj
Exemplo n.º 32
0
    def get_contributors(self):
        # anyone who contributed to any of the idea's posts
        local_uri = AssemblQuadStorageManager.local_uri()
        discussion_storage = \
            AssemblQuadStorageManager.discussion_storage_name()

        idea_uri = URIRef(self.uri(local_uri))
        clause = '''select count(distinct ?postP), count(distinct ?post), ?author where {
            %s idea:includes* ?ideaP .
            ?postP assembl:postLinkedToIdea ?ideaP  .
            ?post sioc:reply_of* ?postP .
            ?post sioc:has_creator ?author }'''
        r = self.db.execute(
            SparqlClause(clause % (
                idea_uri.n3(),),
                quad_storage=discussion_storage.n3()))
        r = [(int(cpp), int(cp), 'local:AgentProfile/' + a.rsplit('/',1)[1]
              ) for (cpp, cp, a) in r]
        r.sort(reverse=True)
        return [a for (cpp, cp, a) in r]
Exemplo n.º 33
0
    def handle_GET(self, request, context):
        uri = URIRef(request.GET.get('uri', ''))
        types = self.get_types(uri)
        if not types:
            raise Http404
        try:
            depth = max(0, min(int(request.GET.get('depth')), 3))
        except (TypeError, ValueError):
            depth = 2

        try:
            direction = self._DIRECTIONS[request.GET['direction']]
        except KeyError:
            direction = self._DIRECTIONS['both']
        direction = direction % {'depth': depth}

        query = self._QUERY % {'uri': uri.n3(), 'direction': direction}
        graph = self.endpoint.query(query)

        subjects = [
            Resource(s, graph, self.endpoint)
            for s in set(graph.subjects(NS.rdf.type))
        ]
        hexhashes = set(s.hexhash for s in subjects)

        context.update({
            'graph': graph,
            'queries': [graph.query],
            'subjects': subjects,
            'subject': Resource(uri, graph, self.endpoint),
            'hexhashes': hexhashes,
            'depth': depth,
            'layout': request.GET.get('layout'),
            'direction': request.GET.get('direction'),
            'minimal': request.GET.get('minimal'),
        })

        return self.render(request, context, 'citation-network')
Exemplo n.º 34
0
    def change_value(self, entity_uri, property_uri, index, value):
        if type(entity_uri) == str:
            entity_uri = URIRef(entity_uri)
        if type(property_uri) == str:
            property_uri = URIRef(property_uri)

        db = get_db()

        # delete previous value from graph
        prev_value = db.execute(
            'SELECT object FROM knowledge '
            '   WHERE subgraph_id = ? AND subject = ? AND predicate = ? AND property_index = ?',
            (self.id, entity_uri.n3(), property_uri.n3(), index)).fetchone()
        if prev_value:
            self.graph.remove((entity_uri, property_uri,
                               parse_n3_term(prev_value['object'])))

        # add new value to graph
        validity, value = self.check_property_value(property_uri, value)
        if validity:
            # the check returned an error message
            del self.properties[(entity_uri, property_uri)][index]
            db.execute(
                'UPDATE knowledge SET object = ? '
                '   WHERE subgraph_id = ? AND subject = ? AND predicate = ? AND property_index = ?',
                (Literal('').n3(), self.id, entity_uri.n3(), property_uri.n3(),
                 index))
            db.commit()
            self.root = None
            return validity

        self.graph.add((entity_uri, property_uri, value))
        self.properties[(entity_uri, property_uri)][index] = value
        db.execute(
            'UPDATE knowledge SET object = ? '
            '   WHERE subgraph_id = ? AND subject = ? AND predicate = ? AND property_index = ?',
            (value.n3(), self.id, entity_uri.n3(), property_uri.n3(), index))
        db.commit()

        self.root = None  # forces a rebuild of the root entity
Exemplo n.º 35
0
class ContentProvider(object):

    #implements(IContentProvider)

    def __init__(self, config):
        self.config = config
        self.jena = JenaHelper(config)
        self.harvester = URIRef(config['harvesteruri'])
        data = self.jena.graphquery("DESCRIBE %s" % self.harvester.n3())
        #import pdb; pdb.set_trace()

        self.originatingSource = data.value(self.harvester,
                                            ANDS.originatingSource) or Literal(
                                                config['originatingsource'])
        self.groupDescription = data.value(self.harvester,
                                           ANDS.groupDescription) or Literal(
                                               config['groupdescription'])
        self.item_sparql_query = resource_string(__name__,
                                                 "item_sparql.sparql")

    def set_logger(self, log):
        """Set the logger instance for this class
        """
        self.log = log

    def update(self, from_date=None):
        """Harvests new content added since from_date
        returns a list of content_ids that were changed/added,
        this should be called before get_contents is called
        """
        query = resource_string(__name__, "items_to_harvest.sparql")
        self._content = self.jena.selectquery(query %
                                              {'harvester': self.harvester})

        return self._content

    def count(self):
        """Returns number of content objects in the repository
        returns None if number is unknown, this should not be
        called before update is called
        """
        return len(self._content)

    def get_content_ids(self):
        """returns a list/generator of content_ids
        """
        return self._content

    def get_content_by_id(self, id):
        """Return content of a specific id
        """
        # assume id is URIRef instance
        g = Graph(identifier=URIRef(id))
        #print self.item_sparql_query %{"subject":id, 'harvester': self.harvester}
        data = self.jena.graphquery(
            self.item_sparql_query % {
                "subject": id,
                'harvester': self.harvester
            }, g)
        #print data
        # FIXME: make tese conditional
        data.add((URIRef(id), ANDS.originatingSource, self.originatingSource))
        data.add((URIRef(id), ANDS.groupDescription, self.groupDescription))
        return data
Exemplo n.º 36
0
 def S(self, uri):
     if isinstance(uri, str):
         uri = URIRef(uri)
     return uri.n3(self.g.g.namespace_manager)
Exemplo n.º 37
0
# the Free Software Foundation; either version 3, or (at your option)
# any later version.

# This file is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with n3; see the file LICENSE.  If not, write to
# the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
# Boston, MA 02110-1301 USA,

# Serializing a single term to N3

from rdflib import Graph, URIRef, Literal, BNode
from rdflib.namespace import FOAF, NamespaceManager

person = URIRef('http://xmlns.com/foaf/0.1/Person')
print(person.n3())

g = Graph()
print(g.bind("foaf", FOAF))

print(person.n3(g.namespace_manager))

l = Literal(2)
print(l.n3())

print(l.n3(g.namespace_manager))
Exemplo n.º 38
0
def fix_socrata_graph(g, dataset_dict, portal_url):
    dataset_ref = None
    # add additional info
    if 'view' in dataset_dict and isinstance(dataset_dict['view'], dict):
        data = dataset_dict['view']
        try:
            identifier = data['id']
            uri = '{0}/dataset/{1}'.format(portal_url.rstrip('/'), identifier)
            dataset_ref = URIRef(uri)
            # replace blank node by dataset reference
            dataset_node = g.value(predicate=DCT.identifier, object=Literal(identifier))
            if dataset_node:
                for s, p, o in g.triples( (dataset_node, None, None) ):
                    g.remove((s, p, o))
                    g.add((dataset_ref, p, o))
            if (dataset_ref, RDF.type, DCAT.Dataset) not in g:
                g.add((dataset_ref, RDF.type, DCAT.Dataset))

            # owner
            if 'owner' in data and isinstance(data['owner'], dict) and 'displayName' in data['owner']:
                owner = data['owner']['displayName']
                # add owner as publisher
                # BNode: dataset_ref + DCT.publisher + owner
                bnode_hash = hashlib.sha1(dataset_ref.n3() + DCT.publisher.n3() + owner)
                publisher_details = BNode(bnode_hash.hexdigest())

                g.add((publisher_details, RDF.type, FOAF.Organization))
                g.add((dataset_ref, DCT.publisher, publisher_details))
                g.add((publisher_details, FOAF.name, Literal(owner)))
            # author
            if 'tableAuthor' in data and isinstance(data['tableAuthor'], dict) and 'displayName' in data['tableAuthor']:
                author = data['tableAuthor']['displayName']
                # BNode: dataset_ref + VCARD.fn + author
                bnode_hash = hashlib.sha1(dataset_ref.n3() + VCARD.fn.n3() + author)
                contact_details = BNode(bnode_hash.hexdigest())

                g.add((contact_details, RDF.type, VCARD.Organization))
                g.add((dataset_ref, DCAT.contactPoint, contact_details))
                g.add((contact_details, VCARD.fn, Literal(author)))

            #title
            #if 'name' in data:

            #    g.add((dataset_ref, DCT.title, Literal('name')))

        except Exception as e:
            pass
        try:
            # redesign distribution, format
            for ds, has_distr, dcat_download in g.triples((None, DCAT.distribution, None)):

                # create new distr
                # BNode: dataset_ref + dcat_download
                bnode_hash = hashlib.sha1(dataset_ref.n3() + dcat_download)
                distribution = BNode(bnode_hash.hexdigest())

                # rewrite format
                for s, p, format_bnode in g.triples((dcat_download, DCT['format'], None)):
                    format = g.value(format_bnode, RDFS.label)
                    mime_type = g.value(format_bnode, RDF.value)
                    # keep the blank node and add type MediaTypeOrExtent
                    #g.remove((format_bnode, None, None))
                    #g.add((distribution, DCT['format'], format))
                    g.add((format, RDF.type, DCT.MediaTypeOrExtent))
                    # additionally add media type to distribution
                    g.add((distribution, DCAT.mediaType, mime_type))
                    g.remove((s, p, format_bnode))

                # add new distr
                g.add((ds, DCAT.distribution, distribution))
                g.add((distribution, RDF.type, DCAT.Distribution))
                # remove old dcat:Download
                g.remove((ds, has_distr, dcat_download))
                g.remove((dcat_download, RDF.type, None))
                # add links from old distribution
                for s, p, o in g.triples((dcat_download, None, None)):
                    g.remove((s, p, o))
                    g.add((distribution, p, o))

        except Exception as e:
            pass

        try:
            # created, modified keys
            ODS_created = URIRef('http://open-data-standards.github.com/2012/01/open-data-standards#created')
            ODS_modified = URIRef('http://open-data-standards.github.com/2012/01/open-data-standards#last_modified')
            for s, p, o in g.triples((None, ODS_created, None)):
                g.remove((s, p, o))
                g.add((s, DCT.issued, o))
            for s, p, o in g.triples((None, ODS_modified, None)):
                g.remove((s, p, o))
                g.add((s, DCT.modified, o))

        except Exception as e:
            pass
    if not dataset_ref:
        dataset_ref = g.value(predicate=RDF.type, object=DCAT.Dataset)
    return dataset_ref
Exemplo n.º 39
0
def graph_from_data_gouv_fr(g, dataset_dict, portal_url):
    identifier = dataset_dict['id']
    uri = dataset_dict['page']

    # dataset subject
    dataset_ref = URIRef(uri)
    for prefix, namespace in namespaces.iteritems():
        g.bind(prefix, namespace)

    g.add((dataset_ref, RDF.type, DCAT.Dataset))

    # identifier
    g.add((dataset_ref, DCT.identifier, Literal(identifier)))
    # Basic fields
    items = [
        ('title', DCT.title, None),
        ('description', DCT.description, None),
        ('page', DCAT.landingPage, None),
        ('frequency', DCT.accrualPeriodicity, None),
    ]
    _add_triples_from_dict(g, dataset_dict, dataset_ref, items)

    # Tags
    for tag in dataset_dict.get('tags', []):
        if isinstance(tag, basestring):
            g.add((dataset_ref, DCAT.keyword, Literal(tag)))

    # Dates
    items = [
        ('created_at', DCT.issued, None),
        ('last_modified', DCT.modified, ['last_update'])
    ]
    _add_date_triples_from_dict(g, dataset_dict, dataset_ref, items)

    # publisher
    publisher = dataset_dict.get('organization')
    if publisher and isinstance(publisher, dict):
        publisher_id = publisher.get('id')
        publisher_name = publisher.get('name')
        publisher_page = publisher.get('page')
        if publisher_page:
            publisher_details = URIRef(publisher_page)
            g.add((publisher_details, FOAF.homepage, URIRef(publisher_page)))
        else:
            # BNode: dataset_ref + DCT.publisher + publisher_name
            bnode_hash = hashlib.sha1(dataset_ref.n3() + DCT.publisher.n3() + publisher_id)
            publisher_details = BNode(bnode_hash.hexdigest())

        g.add((publisher_details, RDF.type, FOAF.Organization))
        g.add((publisher_details, DCT.identifier, Literal(publisher_id)))
        g.add((dataset_ref, DCT.publisher, publisher_details))
        if publisher_name:
            g.add((publisher_details, FOAF.name, Literal(publisher_name)))

    license = None
    license_id = dataset_dict.get('license')
    if license_id:
        id_string = dataset_ref.n3() + DCT.license.n3() + license_id
        bnode_hash = hashlib.sha1(id_string.encode('utf-8'))
        license = BNode(bnode_hash.hexdigest())
        g.add((license, RDF.type, DCT.LicenseDocument))
        g.add((license, DCT.identifier, Literal(license_id)))

    # Resources
    for resource_dict in dataset_dict.get('resources', []):
        distribution = URIRef(resource_dict['id'])

        g.add((dataset_ref, DCAT.distribution, distribution))
        g.add((distribution, RDF.type, DCAT.Distribution))

        # License
        if license:
            g.add((distribution, DCT.license, license))

        # Simple values
        items = [
            ('title', DCT.title, None),
            ('description', DCT.description, None),
            ('created_at', DCT.issued, None),
            ('last_modified', DCT.modified, None)
        ]
        _add_triples_from_dict(g, resource_dict, distribution, items)

        if resource_dict.get('format'):
            id_string = dataset_ref.n3() + DCT['format'].n3() + resource_dict['format']
            bnode_hash = hashlib.sha1(id_string.encode('utf-8'))
            f = BNode(bnode_hash.hexdigest())

            g.add((f, RDF.type, DCT.MediaTypeOrExtent))
            g.add((f, RDFS.label, Literal(resource_dict['format'])))
            g.add((distribution, DCT['format'], f))
            if resource_dict.get('mime'):
                g.add((f, RDF.value, Literal(resource_dict['mime'])))

        if resource_dict.get('mime'):
            g.add((distribution, DCAT.mediaType,
                   Literal(resource_dict['mime'])))

        download_url = resource_dict.get('url')
        if download_url:
            download_url = download_url.strip()
            if is_valid_uri(download_url):
                g.add((distribution, DCAT.downloadURL, URIRef(download_url)))
            else:
                g.add((distribution, DCAT.downloadURL, Literal(download_url)))

        if resource_dict.get('filesize'):
            try:
                g.add((distribution, DCAT.byteSize,
                       Literal(float(resource_dict['filesize']),
                               datatype=XSD.decimal)))
            except (ValueError, TypeError):
                g.add((distribution, DCAT.byteSize,
                       Literal(resource_dict['filesize'])))
    return dataset_ref
Exemplo n.º 40
0
def graph_from_opendatasoft(g, dataset_dict, portal_url):
    # available: title, description, language, theme, keyword, license, publisher, references
    # additional: created, issued, creator, contributor, accrual periodicity, spatial, temporal, granularity, data quality

    identifier = dataset_dict['datasetid']
    uri = '{0}/explore/dataset/{1}'.format(portal_url.rstrip('/'), identifier)

    # dataset subject
    dataset_ref = URIRef(uri)
    for prefix, namespace in namespaces.iteritems():
        g.bind(prefix, namespace)

    g.add((dataset_ref, RDF.type, DCAT.Dataset))

    # identifier
    g.add((dataset_ref, DCT.identifier, Literal(identifier)))
    data = dataset_dict['metas']
    # Basic fields
    items = [
        ('title', DCT.title, None),
        ('description', DCT.description, None),
    ]
    _add_triples_from_dict(g, data, dataset_ref, items)

    #  Lists
    items = [
        ('language', DCT.language, None),
        ('theme', DCAT.theme, None),
        ('keyword', DCAT.keyword, None),
    ]
    _add_list_triples_from_dict(g, data, dataset_ref, items)

    # publisher
    publisher_name = data.get('publisher')
    if publisher_name:
        # BNode: dataset_ref + DCT.publisher + publisher_name
        bnode_hash = hashlib.sha1(dataset_ref.n3() + DCT.publisher.n3() + publisher_name)
        publisher_details = BNode(bnode_hash.hexdigest())

        g.add((publisher_details, RDF.type, FOAF.Organization))
        g.add((dataset_ref, DCT.publisher, publisher_details))
        g.add((publisher_details, FOAF.name, Literal(publisher_name)))
        # TODO any additional publisher information available? look for fields

    # Dates
    items = [
        #('metadata_processed', DCT.issued, ['metadata_created']),
        ('modified', DCT.modified, ['metadata_processed', 'metadata_modified']),
    ]
    _add_date_triples_from_dict(g, data, dataset_ref, items)

    # references
    references = data.get('references')
    if references and isinstance(references, basestring) and bool(urlparse.urlparse(references).netloc):
        references = references.strip()
        if is_valid_uri(references):
            g.add((dataset_ref, RDFS.seeAlso, URIRef(references)))
        else:
            g.add((dataset_ref, RDFS.seeAlso, Literal(references)))

    # store licenses for distributions
    license = data.get('license')

    # distributions
    if dataset_dict.get('has_records'):
        exports = [('csv', 'text/csv'), ('json', 'application/json'), ('xls', 'application/vnd.ms-excel')]
        if 'geo' in dataset_dict.get('features', []):
            exports.append(('geojson', 'application/vnd.geo+json'))
            exports.append(('kml', 'application/vnd.google-earth.kml+xml'))
            # TODO shape files?
            # exports.append(('shp', 'application/octet-stream'))
        for format, mimetype in exports:
            # URL
            url = portal_url.rstrip('/') + '/api/records/1.0/download?dataset=' + identifier + '&format=' + format

            # BNode: dataset_ref + url
            id_string = dataset_ref.n3() + url
            bnode_hash = hashlib.sha1(id_string.encode('utf-8'))
            distribution = BNode(bnode_hash.hexdigest())

            g.add((dataset_ref, DCAT.distribution, distribution))
            g.add((distribution, RDF.type, DCAT.Distribution))

            if is_valid_uri(url):
                g.add((distribution, DCAT.accessURL, URIRef(url)))
            else:
                g.add((distribution, DCAT.accessURL, Literal(url)))

            # License
            if license:
                # BNode: distribution + url
                id_string = distribution.n3() + license
                bnode_hash = hashlib.sha1(id_string.encode('utf-8'))
                l = BNode(bnode_hash.hexdigest())

                g.add((distribution, DCT.license, l))
                g.add((l, RDF.type, DCT.LicenseDocument))
                g.add((l, RDFS.label, Literal(license)))

            # Format
            # BNode: distribution + format + mimetype
            id_string = distribution.n3() + format + mimetype
            bnode_hash = hashlib.sha1(id_string.encode('utf-8'))
            f = BNode(bnode_hash.hexdigest())

            g.add((distribution, DCT['format'], f))
            g.add((f, RDF.type, DCT.MediaTypeOrExtent))
            g.add((f, RDFS.label, Literal(format)))
            g.add((f, RDF.value, Literal(mimetype)))
            g.add((distribution, DCAT.mediaType, Literal(mimetype)))


            # Dates
            items = [
                #('issued', DCT.issued, None),
                ('data_processed', DCT.modified, None),
            ]
            _add_date_triples_from_dict(g, data, distribution, items)

    # attachments
    for attachment in dataset_dict.get('attachments', []):
        # BNode: dataset_ref + url
        id_string = dataset_ref.n3() + attachment
        bnode_hash = hashlib.sha1(id_string.encode('utf-8'))
        distribution = BNode(bnode_hash.hexdigest())

        g.add((dataset_ref, DCAT.distribution, distribution))
        g.add((distribution, RDF.type, DCAT.Distribution))
        if license:
            # BNode: distribution + url
            id_string = distribution.n3() + license
            bnode_hash = hashlib.sha1(id_string.encode('utf-8'))
            l = BNode(bnode_hash.hexdigest())

            g.add((distribution, DCT.license, l))
            g.add((l, RDF.type, DCT.LicenseDocument))
            g.add((l, RDFS.label, Literal(license)))

        #  Simple values
        items = [
            ('title', DCT.title, None),
            ('mimetype', DCT.mediaType, None),
            ('format', DCT['format'], None),
        ]
        _add_triples_from_dict(g, attachment, distribution, items)

        # URL
        if attachment.get('id'):
            url = portal_url.rstrip('/') + '/api/datasets/1.0/' + identifier + '/attachments/' + attachment.get('id')
            g.add((distribution, DCT.accessURL, Literal(url)))
    return dataset_ref
Exemplo n.º 41
0
class Context(GraphOperations):

    def __init__(self, parent, identifier=None, meta=None):
        self.parent = parent
        if identifier is None:
            identifier = BNode()
        self.identifier = URIRef(identifier)
        self.meta = MetaData(self, meta)
        self.meta.generate()

    @property
    def graph(self):
        if not hasattr(self, '_graph') or self._graph is None:
            if self.parent.buffered:
                self._graph = Graph(identifier=self.identifier)
            else:
                self._graph = self.parent.graph.get_context(self.identifier)
        return self._graph

    def add(self, schema, data):
        """ Stage ``data`` as a set of statements, based on the given
        ``schema`` definition. """
        binding = self.get_binding(schema, data)
        uri, triples = triplify(binding)
        for triple in triples:
            self.graph.add(triple)
        return uri

    def save(self):
        """ Transfer the statements in this context over to the main store. """
        if self.parent.buffered:
            query = """
                INSERT DATA { GRAPH %s { %s } }
            """
            query = query % (self.identifier.n3(),
                             self.graph.serialize(format='nt'))
            self.parent.graph.update(query)
            self.flush()
        else:
            self.meta.generate()

    def delete(self):
        """ Delete all statements matching the current context identifier
        from the main store. """
        if self.parent.buffered:
            query = 'CLEAR SILENT GRAPH %s ;' % self.identifier.n3()
            self.parent.graph.update(query)
            self.flush()
        else:
            self.graph.remove((None, None, None))

    def flush(self):
        """ Clear all the pending statements in the local context, without
        transferring them to the main store. """
        self._graph = None

    def __str__(self):
        return self.identifier

    def __repr__(self):
        return '<Context("%s")>' % self.identifier
Exemplo n.º 42
0
def getShortUri(graph, uri):
    if type(uri) is URIRef:
        id = uri
    else:
        id = URIRef(uri)
    return id.n3(graph.namespace_manager)
Exemplo n.º 43
0
    def get_idea_ids_showing_post(cls, post_id, direct=False, indirect=True):
        "Given a post, give the ID of the ideas that show this message"
        # This works because of a virtuoso bug...
        # where DISTINCT gives IDs instead of URIs.
        from .generic import Content
        from .idea_content_link import Extract
        assert direct or indirect
        discussion_storage = \
            AssemblQuadStorageManager.discussion_storage_name()

        post_uri = URIRef(Content.uri_generic(
            post_id, AssemblQuadStorageManager.local_uri()))
        if indirect and not direct:
            clause = '''select distinct ?idea where {
                %s sioc:reply_of* ?post .
                ?post assembl:postLinkedToIdea ?ideaP .
                ?idea idea:includes* ?ideaP }'''
        elif direct and not indirect:
            clause = '''select distinct ?idea where {
                %s sioc:reply_of* ?post .
                ?post assembl:postLinkedToIdea ?idea }'''
        if direct and indirect:
            # Not used anymore, to be cleaned.
            clause = '''select distinct ?postP, ?ideaP, ?idea, ?ex where {
                %s sioc:reply_of* ?postP .
                ?postP assembl:postLinkedToIdea ?ideaP  .
                ?idea idea:includes* ?ideaP .
                optional { ?ex oa:hasSource ?postP ;
                    assembl:resourceExpressesIdea ?ideaP . } }'''
            r = list(cls.default_db.execute(
                SparqlClause(clause % (
                    post_uri.n3(),),
                    quad_storage=discussion_storage.n3())))
            r = [(int(x), int(y), int(z), int(e) if e else None)
                 for (x, y, z, e) in r]

            def comp((pp1, ip1, i1, e1), (pp2, ip2, i2, e2)):
                direct_idea1 = ip1 == i1
                direct_idea2 = ip2 == i2
                direct_post1 = pp1 == post_id
                direct_post2 = pp2 == post_id
                if direct_idea1 != direct_idea2:
                    return -1 if direct_idea1 else 1
                if direct_post1 != direct_post2:
                    return -1 if direct_post1 else 1
                if pp1 != pp2:
                    # assume hry is congruent with post order.
                    return pp2 - pp1
                if ip1 != ip2:
                    # TODO: Real hry order. Should be rare.
                    return ip2 - ip1
                if i1 != i2:
                    # TODO: Real hry order.
                    return i2 - i1
                if e1 != e2:
                    return e2 - e1
                return 0
            r.sort(cmp=comp)
            # can't trust virtuoso's uniqueness.
            r = [e for e, _ in groupby(r)]
            return [(
                Idea.uri_generic(i),
                Idea.uri_generic(ip),
                Content.uri_generic(pp),
                Extract.uri_generic(ex) if ex else None
            ) for (pp, ip, i, ex) in r]
Exemplo n.º 44
0
def extract_task_data_each_query(rdf_graph, task: URIRef) -> sparql.Result:
    query_result: sparql.Result = rdf_graph.query(
        Template(task_data_each_query_template).substitute(task=task.n3()))
    assert len(query_result) > 0
    return query_result
Exemplo n.º 45
0
def extract_task_meta_data(rdf_graph: Graph, task: URIRef) -> sparql.ResultRow:
    query_result: sparql.Result = rdf_graph.query(
        Template(task_meta_data_template).substitute(task=task.n3()))
    assert len(query_result) == 1
    return next(iter(query_result))