示例#1
0
 def format(self, value):
     return utils.isoformat(value)
示例#2
0
def timestamp(ts):
    return Literal(isoformat(ts), datatype=XSD.dateTime)
示例#3
0
def describe_dataset(data, created_at):
    cursor = database.get_db().cursor()
    contributors = cursor.execute('''
    SELECT DISTINCT created_by, updated_by
    FROM patch_request
    WHERE merged = 1
    AND id > 1''').fetchall()

    with open(os.path.join(os.path.dirname(__file__), 'void-stub.ttl')) as f:
        description_g = Graph().parse(file=f, format='turtle')
    ns = Namespace(description_g.value(
        predicate=RDF.type, object=VOID.DatasetDescription))
    dataset_g = Graph().parse(data=json.dumps(data), format='json-ld')

    partitions = description_g.objects(
        subject=ns.d, predicate=VOID.classPartition)
    for part in partitions:
        clazz = description_g.value(subject=part, predicate=VOID['class'])
        entity_count = len(dataset_g.query('''
        SELECT DISTINCT ?s
        WHERE {
          ?s a <%s> .
          FILTER (STRSTARTS(STR(?s), "%s"))
        }''' % (clazz, ns)))
        description_g.add(
            (part, VOID.entities, Literal(entity_count, datatype=XSD.integer)))

    linksets = description_g.subjects(predicate=RDF.type, object=VOID.Linkset)
    for linkset in linksets:
        target = description_g.value(
            subject=linkset, predicate=VOID.objectsTarget)
        predicate = description_g.value(
            subject=linkset, predicate=VOID.linkPredicate)
        uriSpace = description_g.value(
            subject=target, predicate=VOID.uriSpace).value
        triples = len(dataset_g.query('''
SELECT ?s ?p ?o
WHERE {
  ?s <%s> ?o .
  FILTER (STRSTARTS(STR(?o), "%s")) .
}''' % (predicate, uriSpace)))
        description_g.add(
            (linkset, VOID.triples, Literal(triples, datatype=XSD.integer)))

    def add_to_description(p, o):
        description_g.add((ns.d, p, o))

    add_to_description(
        DCTERMS.modified,
        Literal(utils.isoformat(created_at), datatype=XSD.dateTime))

    add_to_description(
        DCTERMS.provenance,
        URIRef(utils.absolute_url(data['@context']['@base'], 'history')
               + '#changes')
    )

    add_to_description(
        VOID.triples, Literal(len(dataset_g), datatype=XSD.integer))

    for row in contributors:
        add_to_description(
            DCTERMS.contributor, URIRef(row['created_by']))
        if row['updated_by']:
            add_to_description(
                DCTERMS.contributor, URIRef(row['updated_by']))

    return description_g.serialize(format='turtle')
示例#4
0
def history():
    g = Graph()
    changelog = Collection(g, URIRef("#changelog"))
    cursor = database.get_db().cursor()
    for row in cursor.execute(
        """
SELECT
  id,
  created_at,
  created_by,
  updated_by,
  merged_at,
  merged_by,
  applied_to,
  resulted_in,
  created_entities,
  updated_entities,
  removed_entities
FROM patch_request
WHERE merged = 1
ORDER BY id ASC
"""
    ).fetchall():
        change = URIRef("#change-{}".format(row["id"]))
        patch = URIRef("#patch-{}".format(row["id"]))
        g.add((patch, FOAF.page, PERIODO[identifier.prefix(url_for("patch", id=row["id"]))]))
        g.add((change, PROV.startedAtTime, Literal(utils.isoformat(row["created_at"]), datatype=XSD.dateTime)))
        g.add((change, PROV.endedAtTime, Literal(utils.isoformat(row["merged_at"]), datatype=XSD.dateTime)))
        dataset = PERIODO[identifier.prefix(url_for("abstract_dataset"))]
        version_in = PERIODO[identifier.prefix(url_for("abstract_dataset", version=row["applied_to"]))]
        g.add((version_in, PROV.specializationOf, dataset))
        version_out = PERIODO[identifier.prefix(url_for("abstract_dataset", version=row["resulted_in"]))]
        g.add((version_out, PROV.specializationOf, dataset))

        g.add((change, PROV.used, version_in))
        g.add((change, PROV.used, patch))
        g.add((change, PROV.generated, version_out))

        def add_entity_version(entity_id):
            entity = PERIODO[entity_id]
            entity_version = PERIODO[entity_id + "?version={}".format(row["resulted_in"])]
            g.add((entity_version, PROV.specializationOf, entity))
            g.add((change, PROV.generated, entity_version))
            return entity_version

        for entity_id in json.loads(row["created_entities"]):
            add_entity_version(entity_id)

        for entity_id in json.loads(row["updated_entities"]):
            entity_version = add_entity_version(entity_id)
            prev_entity_version = PERIODO[entity_id + "?version={}".format(row["applied_to"])]
            g.add((entity_version, PROV.wasRevisionOf, prev_entity_version))

        for entity_id in json.loads(row["removed_entities"]):
            g.add((change, PROV.invalidated, PERIODO[entity_id]))

        for field, term in (("created_by", "submitted"), ("updated_by", "updated"), ("merged_by", "merged")):
            if row[field] == "initial-data-loader":
                continue
            agent = URIRef(row[field])
            association = URIRef("#patch-{}-{}".format(row["id"], term))
            g.add((change, PROV.wasAssociatedWith, agent))
            g.add((change, PROV.qualifiedAssociation, association))
            g.add((association, PROV.agent, agent))
            g.add((association, PROV.hadRole, PERIODO[identifier.prefix(url_for("vocab") + "#" + term)]))

        changelog.append(change)

    def ordering(o):
        if o["@id"] == "#changelog":
            # sort first
            return " "
        return o["@id"]

    jsonld = json.loads(g.serialize(format="json-ld", context=CONTEXT).decode("utf-8"))
    jsonld["history"] = sorted(jsonld["history"], key=ordering)
    return json.dumps(jsonld, sort_keys=True)