示例#1
0
def buildhhear(ctx):
    setl_graph = Graph()
    setl_graph.parse('hhear-ontology.setl.ttl', format="turtle")
    cwd = os.getcwd()
    formats = ['ttl', 'owl', 'json']
    ontology_output_files = [
        setl_graph.resource(URIRef('file://' + cwd + '/hhear.' + x))
        for x in formats
    ]
    print(len(setl_graph))
    for filename in os.listdir(HHEAR_DIR):
        if not filename.endswith('.ttl') or filename.startswith('#'):
            continue
        print('Adding fragment', filename)

        fragment = setl_graph.resource(BNode())
        for ontology_output_file in ontology_output_files:
            print(ontology_output_file.identifier,
                  list(ontology_output_file[prov.wasGeneratedBy]))
            ontology_output_file.value(prov.wasGeneratedBy).add(
                prov.used, fragment)
        fragment.add(RDF.type, setlr.void.Dataset)
        fragment_extract = setl_graph.resource(BNode())
        fragment.add(prov.wasGeneratedBy, fragment_extract)
        fragment_extract.add(RDF.type, setl.Extract)
        fragment_extract.add(prov.used,
                             URIRef('file://' + HHEAR_DIR + filename))

    setlr._setl(setl_graph)
示例#2
0
def build():
    setl_graph = Graph()
    setl_graph.load(SETL_FILE, format="turtle")
    cwd = os.getcwd()
    formats = ['ttl', 'owl', 'json']
    ontology_output_files = [
        setl_graph.resource(URIRef('file://' + cwd + '/hasco.' + x))
        for x in formats
    ]
    for filename in os.listdir(HASCO_DIR):
        if not filename.endswith('.ttl'):
            continue
        print 'Adding fragment', filename

        fragment = setl_graph.resource(BNode())
        for ontology_output_file in ontology_output_files:
            ontology_output_file.value(prov.wasGeneratedBy).add(
                prov.used, fragment)
        fragment.add(RDF.type, setlr.void.Dataset)
        fragment_extract = setl_graph.resource(BNode())
        fragment.add(prov.wasGeneratedBy, fragment_extract)
        fragment_extract.add(RDF.type, setl.Extract)
        fragment_extract.add(prov.used,
                             URIRef('file://' + HASCO_DIR + filename))

    print setl_graph.serialize(format="turtle")

    setlr._setl(setl_graph)
示例#3
0
    def process(self, i, o):
        query_store = self.app.db.store
        if hasattr(query_store, 'endpoint'):
            query_store = database.create_query_store(self.app.db.store)
        db_graph = rdflib.ConjunctiveGraph(store=query_store)
        db_graph.NS = self.app.NS
        setlr.actions[whyis.sparql] = db_graph
        setlr.actions[whyis.NanopublicationManager] = self.app.nanopub_manager
        setlr.actions[whyis.Nanopublication] = self.app.nanopub_manager.new
        setl_graph = i.graph
        #        setlr.run_samples = True
        resources = setlr._setl(setl_graph)
        # retire old copies
        old_np_map = {}
        to_retire = []
        for new_np, assertion, orig in self.app.db.query(
                '''select distinct ?np ?assertion ?original_uri where {
    ?np np:hasAssertion ?assertion.
    ?assertion a np:Assertion;
        prov:wasGeneratedBy/a ?setl;
        prov:wasQuotedFrom ?original_uri.
}''',
                initBindings=dict(setl=i.identifier),
                initNs=dict(prov=prov, np=np)):
            old_np_map[orig] = assertion
            to_retire.append(new_np)
            if len(to_retire) > 100:
                self.app.nanopub_manager.retire(*to_retire)
                to_retire = []
        self.app.nanopub_manager.retire(*to_retire)
        # print resources
        for output_graph in setl_graph.subjects(prov.wasGeneratedBy,
                                                i.identifier):
            print(output_graph)
            if setl_graph.resource(output_graph)[rdflib.RDF.type:whyis.
                                                 NanopublicationCollection]:
                self.app.nanopub_manager.publish(resources[output_graph])
            else:
                out = resources[output_graph]
                out_conjunctive = rdflib.ConjunctiveGraph(
                    store=out.store, identifier=output_graph)
                to_publish = []
                triples = 0
                for new_np in self.app.nanopub_manager.prepare(
                        out_conjunctive):
                    self.explain(new_np, i, o)
                    to_publish.append(new_np)

                # triples += len(new_np)
                # if triples > 10000:
                self.app.nanopub_manager.publish(*to_publish)
        for resource, obj in list(resources.items()):
            if hasattr(i, 'close'):
                print("Closing", resource)
                try:
                    i.close()
                except:
                    pass
def convert_to_rdf(json_file):
    setl_graph = rdflib.Graph()
    setl_graph.parse(SETL_FILE,format="turtle")
    cwd = os.getcwd()
    
    extract = setl_graph.value(rdflib.URIRef('http://purl.org/twc/codegraph/setl/codegraph_json'), prov.wasGeneratedBy)
    setl_graph.add((extract, prov.used,rdflib.URIRef('file://'+os.path.join(cwd,json_file))))
    
    results = setlr._setl(setl_graph)
    
    single_g = results[rdflib.URIRef('http://purl.org/twc/codegraph/setl/codegraph')]
    print("analyzed " + json_file)
    return single_g
示例#5
0
    def process(self, i, o):
        setl_graph = i.graph
        resources = setlr._setl(setl_graph)
        # retire old copies
        old_np_map = {}
        for new_np, assertion, orig in self.app.db.query(
                '''select distinct ?np ?assertion ?original_uri where {
    ?np np:hasAssertion ?assertion.
    ?assertion a np:Assertion;
        prov:wasGeneratedBy/a ?setl;
        prov:wasQuotedFrom ?original_uri.
}''',
                initBindings=dict(setl=i.identifier),
                initNs=dict(prov=prov, np=np)):
            old_np_map[orig] = assertion
            self.app.nanopub_manager.retire(new_np)
            #print resources
        for output_graph in setl_graph.subjects(prov.wasGeneratedBy,
                                                i.identifier):
            out = resources[output_graph]
            out_conjunctive = rdflib.ConjunctiveGraph(store=out.store,
                                                      identifier=output_graph)
            #print "Generated graph", out.identifier, len(out), len(out_conjunctive)
            mappings = {}
            for new_np in self.app.nanopub_manager.prepare(out_conjunctive,
                                                           mappings=mappings):
                self.explain(new_np, i, o)
                print "Publishing", new_np.identifier
                orig = [
                    orig for orig, new in mappings.items()
                    if new == new_np.assertion.identifier
                ]
                if len(orig) == 0:
                    continue
                orig = orig[0]
                print orig
                if isinstance(orig, rdflib.URIRef):
                    new_np.pubinfo.add((new_np.assertion.identifier,
                                        prov.wasQuotedFrom, orig))
                    if orig in old_np_map:
                        new_np.pubinfo.add(
                            (new_np.assertion.identifier, prov.wasRevisionOf,
                             old_np_map[orig]))
                print "Nanopub assertion has", len(
                    new_np.assertion), "statements."
                self.app.nanopub_manager.publish(new_np)
                print 'Published'
def convert_xml(debug=None):
    setl_graph = Graph()
    setl_graph.load(SETL_FILE, format="turtle")
    cwd = os.getcwd()
    if debug:
        files = [debug]
    else:
        files = os.listdir(XML_DIR)[:PROCESS_FILE_COUNT]
    for filename in files:
        try:
            print 'Processing', filename
            local_setl_graph = Graph()
            local_setl_graph += setl_graph
            input_file_resource = local_setl_graph.resource(
                URIRef('http://nanomine.tw.rpi.edu/setl/xml/nanomine_xml'))
            input_file_resource.value(prov.wasGeneratedBy).set(
                prov.used, URIRef('file://' + XML_DIR + filename))

            output_file_resource = local_setl_graph.resource(
                URIRef(OUTPUT_DIR + filename.replace('.xml', '.trig')))
            output_file_resource.set(prov.used,
                                     URIRef('file://' + OUTPUT_DIR + filename))
            output_file_resource.set(dc['format'], Literal('trig'))
            output_file_resource.set(RDF.type, pv.File)
            generated_by = local_setl_graph.resource(BNode())
            output_file_resource.set(prov.wasGeneratedBy, generated_by)
            generated_by.set(RDF.type, setl.Load)
            generated_by.set(
                prov.used,
                URIRef('http://nanomine.tw.rpi.edu/setl/xml/nanopubs'))

            resources = setlr._setl(local_setl_graph)
            for identifier, graph in resources.items():
                if hasattr(graph, 'close'):
                    print "Closing", identifier
                    graph.close()
        except Exception as e:
            print e
示例#7
0
    def process(self, i, o):

        query_store = database.create_query_store(self.app.db.store)
        db_graph = rdflib.ConjunctiveGraph(store=query_store)
        db_graph.NS = self.app.NS
        setlr.actions[whyis.sparql] = db_graph
        setl_graph = i.graph
        #setlr.run_samples = True
        resources = setlr._setl(setl_graph)
        # retire old copies
        old_np_map = {}
        for new_np, assertion, orig in self.app.db.query(
                '''select distinct ?np ?assertion ?original_uri where {
    ?np np:hasAssertion ?assertion.
    ?assertion a np:Assertion;
        prov:wasGeneratedBy/a ?setl;
        prov:wasQuotedFrom ?original_uri.
}''',
                initBindings=dict(setl=i.identifier),
                initNs=dict(prov=prov, np=np)):
            old_np_map[orig] = assertion
            self.app.nanopub_manager.retire(new_np)
            #print resources
        for output_graph in setl_graph.subjects(prov.wasGeneratedBy,
                                                i.identifier):
            out = resources[output_graph]
            out_conjunctive = rdflib.ConjunctiveGraph(store=out.store,
                                                      identifier=output_graph)
            #print "Generated graph", out.identifier, len(out), len(out_conjunctive)
            nanopub_prepare_graph = rdflib.ConjunctiveGraph(store="Sleepycat")
            nanopub_prepare_graph_tempdir = tempfile.mkdtemp()
            nanopub_prepare_graph.store.open(nanopub_prepare_graph_tempdir,
                                             True)

            mappings = {}

            to_publish = []
            triples = 0
            for new_np in self.app.nanopub_manager.prepare(
                    out_conjunctive,
                    mappings=mappings,
                    store=nanopub_prepare_graph.store):
                self.explain(new_np, i, o)
                orig = [
                    orig for orig, new in mappings.items()
                    if new == new_np.assertion.identifier
                ]
                if len(orig) == 0:
                    continue
                orig = orig[0]
                print orig
                if isinstance(orig, rdflib.URIRef):
                    new_np.pubinfo.add((new_np.assertion.identifier,
                                        prov.wasQuotedFrom, orig))
                    if orig in old_np_map:
                        new_np.pubinfo.add(
                            (new_np.assertion.identifier, prov.wasRevisionOf,
                             old_np_map[orig]))
                print "Publishing %s with %s assertions." % (
                    new_np.identifier, len(new_np.assertion))
                to_publish.append(new_np)

            #triples += len(new_np)
            #if triples > 10000:
            self.app.nanopub_manager.publish(*to_publish)
            print 'Published'
示例#8
0
def sdd2owl(c, inputfile, ontology, outputfile, infosheet="InfoSheet"):
    import openpyxl
    import re
    import pandas as pd
    import json
    cwd = os.getcwd()

    setl_graph = Graph()
    setl_graph.parse('sdd_owl_semantics.setl.ttl', format="turtle")

    sddns = rdflib.Namespace('http://purl.org/twc/sdd/setl/')

    tab_config = {
        "Data_Dictionary": sddns.dm_table,
        "Codebook": sddns.codebook_table,
        "Code_Mappings": sddns.codemapping_table,
        "Timeline": sddns.timeline_table
    }

    wb = openpyxl.load_workbook(inputfile)

    infosheet_tab = wb[infosheet]
    infosheet_dict = dict([(row[0].value, row[1].value)
                           for row in infosheet_tab])

    infosheet_resource = setl_graph.resource(sddns.info_sheet)
    infosheet_resource.add(rdflib.RDF.type, setl.Excel)
    infosheet_resource.add(setl.sheetname, rdflib.Literal(infosheet))
    gen = infosheet_resource.value(prov.wasGeneratedBy)
    gen.add(prov.used, URIRef('file://' + os.path.join(cwd, inputfile)))

    for entry, uri in tab_config.items():
        res = setl_graph.resource(uri)
        sheet = infosheet_dict[entry]
        gen = res.value(prov.wasGeneratedBy)
        if sheet.startswith('#'):
            res.add(rdflib.RDF.type, setl.Excel)
            res.add(setl.sheetname, rdflib.Literal(sheet[1:]))
            gen.add(prov.used,
                    URIRef('file://' + os.path.join(cwd, inputfile)))
        else:
            res.add(rdflib.RDF.type, csvw.Table)
            gen.add(prov.used, URIRef(sheet))
    context = {
        "@base": "http://purl.org/twc/ctxid/",
        "sio": "http://semanticscience.org/resource/",
        "chear": "http://hadatac.org/ont/chear#",
        "skos": "http://www.w3.org/2004/02/skos/core#",
        "prov": "http://www.w3.org/ns/prov#",
        "dc": "http://purl.org/dc/terms/",
        "cmo": "http://purl.obolibrary.org/obo/CMO_",
        "doid": "http://purl.obolibrary.org/obo/DOID_",
        "owl": "http://www.w3.org/2002/07/owl#",
        "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
        "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
        "chebi": "http://purl.obolibrary.org/obo/CHEBI_",
        "stato": "http://purl.obolibrary.org/obo/STATO_",
        "obo": "http://purl.obolibrary.org/obo/",
        "pubchem": "http://rdf.ncbi.nlm.nih.gov/pubchem/compound/",
        "dc": "http://purl.org/dc/terms/",
        "hasco": "http://hadatac.org/ont/hasco#",
        "vstoi": "http://hadatac.org/ont/vstoi#",
        "hasneto": "http://hadatac.org/ont/hasneto#",
        "uberon": "http://purl.obolibrary.org/obo/UBERON_",
        "prv": "http://hadatac.org/ont/prov#"
    }

    namespaces = load_namespaces()
    context.update(namespaces)
    setl_graph.add((sddns.metadata_transform, setl.hasContext,
                    Literal(json.dumps(context))))

    setl_graph.add((sddns.namespaces, prov.value,
                    Literal("result = %s" % json.dumps(context))))

    resources = setlr._setl(setl_graph)

    ont = resources[sddns.metadata]

    prefix = "http://purl.org/twc/ctxid/"

    mappings = {}

    for cls, identifier in ont.query('''select ?cls ?identifier where {
        ?cls a owl:Class; dc:identifier ?identifier.
    }''',
                                     initNs={
                                         "owl": rdflib.OWL,
                                         "dc": dc
                                     }):
        if prefix in str(cls):
            id = cls.replace(prefix, '')
            integer_id = int(id, 16)
            ont.add((cls, skos.notation, rdflib.Literal(integer_id)))
            column, code = identifier.split('||||')
            print("Mapping (%s, %s) => %s" % (column, code, cls))
            mappings[(column, code)] = "ctxid:" + id
    with open(ontology, 'wb') as o:
        ont.serialize(o, format="turtle")

    codebook = wb[infosheet_dict['Codebook'][1:]]

    header = None
    code_col = None
    class_col = None
    column_col = None
    for row in codebook.rows:
        if header is None:
            header = row
            for cell in header:
                if cell.value == "Code":
                    code_col = cell.column - 1
                if cell.value == "Class":
                    class_col = cell.column - 1
                if cell.value == "Column":
                    column_col = cell.column - 1
            print('Found Column Headers: Code==%s Class==%s Column=%s' %
                  (code_col, class_col, column_col))
            continue
        if (row[column_col].value == None): continue
        column = str(row[column_col].value)
        code = str(row[code_col].value)
        if (column, code) in mappings:
            print("%s.%s = %s" % (column, code, mappings[(column, code)]))
            row[class_col].value = mappings[(column, code)]

    wb.save(outputfile)