示例#1
0
class NSManager:
    def __init__(self, ns_dict):
        """ TODO: check ns_dict """
        self._ns_dict = ns_dict
        self._rdflib_ns_manager = None

    def __getitem__(self, key):
        return self._ns_dict[key]

    def __getattr__(self, key):
        try:
            return self._ns_dict[key]
        except KeyError:
            raise AttributeError()

    def add_namespace(self, prefix, namespace):
        """ TODO: check prefix and namespace """
        if self._ns_dict.has_key(prefix):
            raise AlreadyRegisteredNSError(prefix)
        self._ns_dict[prefix] = namespace

    @property
    def ns_dict(self):
        return self._ns_dict

    @property
    def rdflib_ns_manager(self):
        """ For using prefixes in RDFlib graphs """
        if self._rdflib_ns_manager is None:
            self._rdflib_ns_manager = NamespaceManager(Graph())
            for namesp in self._ns_dict:
                self._rdflib_ns_manager.bind(namesp, self._ns_dict[namesp])

        return self._rdflib_ns_manager
示例#2
0
文件: graph.py 项目: RDFLib/rdfextras
    def canonicalTerm(self, term):

        if isinstance(term, URIRef):

            if self.prolog is not None:
                namespace_manager = NamespaceManager(Graph())

                for prefix,uri in self.prolog.prefixBindings.items():
                    namespace_manager.bind(prefix, uri, override=False)

                try:
                    prefix,uri,localName = namespace_manager.compute_qname(term)
                except:
                    return term

                if prefix not in self.prolog.prefixBindings:
                    return term
                else:
                    return u':'.join([prefix, localName])

            else:
                return term

        elif isinstance(term, Literal):
            return term.n3()

        elif isinstance(term, BNode):
            return term.n3()

        else:
            assert isinstance(term, Variable)
            return term.n3()
示例#3
0
文件: ajax.py 项目: suchmaske/rdfedit
def serialize_graph(request, rdfjson, base):

    editgraph = Graph()
    editgraph.parse(data=rdfjson, format="rdf-json")

    namespace_manager = NamespaceManager(Graph())
    for ns in namespaces_dict:
        namespace_manager.bind(ns, Namespace(namespaces_dict[ns]), override=False)

    editgraph.namespace_manager = namespace_manager

    if base:
        """
        RDFLib Module to insert the base during serialization is buggy. Manual insertion needed
        graphxml_string = editgraph.serialize(format="pretty-xml", base=base)
        """
        graphxml_string = editgraph.serialize(format="pretty-xml").decode('utf-8', 'ignore')
        graphxml_string = graphxml_string.replace('rdf:RDF\n', 'rdf:RDF\n  xml:base="' + base +'"\n')
        # print graphxml_string
    else:
        graphxml_string = editgraph.serialize(format="pretty-xml")

    graphxml_to_db = RDF_XML(rdfxml_string = graphxml_string)
    graphxml_to_db.save()
    print graphxml_to_db.id

    return json.dumps({'message':graphxml_to_db.id}) 
示例#4
0
    def init_database(self):
        """ Open the configured database """
        self._init_rdf_graph()
        L.debug("opening " + str(self.source))
        try:
            self.source.open()
        except OpenFailError as e:
            L.error('Failed to open the data source because: %s', e)
            raise

        nm = NamespaceManager(self['rdf.graph'])
        self['rdf.namespace_manager'] = nm
        self['rdf.graph'].namespace_manager = nm

        # A runtime version number for the graph should update for all changes
        # to the graph
        self['rdf.graph.change_counter'] = 0

        self['rdf.graph'].store.dispatcher.subscribe(TripleAddedEvent, self._context_changed_handler())
        self['rdf.graph'].store.dispatcher.subscribe(TripleRemovedEvent, self._context_changed_handler())

        self['rdf.graph']._add = self['rdf.graph'].add
        self['rdf.graph']._remove = self['rdf.graph'].remove
        self['rdf.graph'].add = self._my_graph_add
        self['rdf.graph'].remove = self._my_graph_remove
        nm.bind("", self['rdf.namespace'])
示例#5
0
文件: Network.py 项目: carnotip/FuXi
    def __init__(self,ruleStore,name = None,
                 initialWorkingMemory = None,
                 inferredTarget = None,
                 nsMap = {},
                 graphVizOutFile=None,
                 dontFinalize=False,
                 goal=None):
        self.leanCheck = {}
        self.goal = goal
        self.nsMap = nsMap
        self.name = name and name or BNode()
        self.nodes = {}
        self.alphaPatternHash = {}
        self.ruleSet = set()
        for alphaPattern in xcombine(('1','0'),('1','0'),('1','0')):
            self.alphaPatternHash[tuple(alphaPattern)] = {}
        if inferredTarget is None:
            self.inferredFacts = Graph()
            namespace_manager = NamespaceManager(self.inferredFacts)
            for k,v in nsMap.items():
                namespace_manager.bind(k, v)
            self.inferredFacts.namespace_manager = namespace_manager
        else:
            self.inferredFacts = inferredTarget
        self.workingMemory = initialWorkingMemory and initialWorkingMemory or set()
        self.proofTracers = {}
        self.terminalNodes  = set()
        self.instantiations = {}
        start = time.time()
        self.ruleStore=ruleStore
        self.justifications = {}
        self.dischargedBindings = {}
        if not dontFinalize:
            self.ruleStore._finalize()
        self.filteredFacts = Graph()

        #'Universal truths' for a rule set are rules where the LHS is empty.
        # Rather than automatically adding them to the working set, alpha nodes are 'notified'
        # of them, so they can be checked for while performing inter element tests.
        self.universalTruths = []
        from FuXi.Horn.HornRules import Ruleset
        self.rules=set()
        self.negRules = set()
        for rule in Ruleset(n3Rules=self.ruleStore.rules,nsMapping=self.nsMap):
            import warnings
            warnings.warn(
          "Rules in a network should be built *after* construction via "+
          " self.buildNetworkClause(HornFromN3(n3graph)) for instance",
                          DeprecationWarning,2)
            self.buildNetworkFromClause(rule)
        self.alphaNodes = [node for node in self.nodes.values() if isinstance(node,AlphaNode)]
        self.alphaBuiltInNodes = [node for node in self.nodes.values() if isinstance(node,BuiltInAlphaNode)]
        self._setupDefaultRules()
        if initialWorkingMemory:
            start = time.time()
            self.feedFactsToAdd(initialWorkingMemory)
            print >>sys.stderr,"Time to calculate closure on working memory: %s m seconds"%((time.time() - start) * 1000)
        if graphVizOutFile:
            print >>sys.stderr,"Writing out RETE network to ", graphVizOutFile
            renderNetwork(self,nsMap=nsMap).write(graphVizOutFile)
示例#6
0
    def serialize(self, add, delete):

        commit = Namespace("urn:commit:" + str(uuid.uuid1()) + ":")
        eccrev = Namespace("https://vocab.eccenca.com/revision/")

        g = ConjunctiveGraph()
        namespace_manager = NamespaceManager(g)
        namespace_manager.bind('eccrev', eccrev, override=False)

        g.add((commit.term(""), RDF.type, eccrev.Commit))

        graphUris = set(delete.keys()) | set(add.keys())

        for graphUri in graphUris:
            if (graphUri in delete.keys() and len(delete[graphUri]) > 0) or (graphUri in add.keys() and len(add[graphUri]) > 0):
                revision = Namespace("urn:revision:" + str(uuid.uuid1()) + ":")
                g.add((commit.term(""), eccrev.hasRevision, revision.term("")))
                g.add((revision.term(""), RDF.type, eccrev.Revision))
                if str(graphUri) != 'http://quitdiff.default/':
                    g.add((revision.term(""), eccrev.hasRevisionGraph, graphUri))
                if graphUri in delete.keys() and len(delete[graphUri]) > 0:
                    deleteGraphName = revision.term(":delete")
                    g.add((revision.term(""), eccrev.deltaDelete, deleteGraphName))
                    for triple in delete[graphUri]:
                        g.add(triple + (deleteGraphName,))
                if graphUri in add.keys() and len(add[graphUri]) > 0:
                    insertGraphName = revision.term(":insert")
                    g.add((revision.term(""), eccrev.deltaInsert, insertGraphName))
                    for triple in add[graphUri]:
                        g.add(triple + (insertGraphName,))

        return g.serialize(format="trig").decode("utf-8")
示例#7
0
def load_graph_prefixes():
    namespace_manager = NamespaceManager(Graph())

    # restPrefix = Namespace('http://restaurants.recommender.es/od-data/restaurant/')
    # locPrefix = Namespace('http://restaurants.recommender.es/od-data/location/')
    # ratePrefix = Namespace('http://restaurants.recommender.es/od-data/rate/')
    # contPrefix = Namespace('http://restaurants.recommender.es/od-data/contact/')
    #
    # namespace_manager.bind('rest', restPrefix)
    # namespace_manager.bind('loc', locPrefix)
    # namespace_manager.bind('rate', ratePrefix)
    # namespace_manager.bind('cont', contPrefix)

    tree = ET.parse('metadata.xml')
    root = tree.getroot()

    prefixes = root.find("prefixes")

    for prefix in prefixes:
        namespace = Namespace(prefix.find('namespace').text)
        prefix_name = prefix.get('name')

        namespace_manager.bind(prefix_name, namespace)

    return namespace_manager
示例#8
0
    def _create_or_get_graph(self,name):
        if name not in self.models:
            graph = Graph()
            namespace_manager = NamespaceManager(Graph())
            namespace_manager.bind(DEFAULT_NAMESPACE[0], self.default_ns)
            graph.ns_manager = namespace_manager
            self.models[name] = graph

        return self.models[name]
示例#9
0
    def openDatabase(self):
        """ Open a the configured database """
        self._init_rdf_graph()
        L.debug("opening " + str(self.source))
        self.source.open()
        nm = NamespaceManager(self['rdf.graph'])
        self['rdf.namespace_manager'] = nm
        self['rdf.graph'].namespace_manager = nm

        nm.bind("", self['rdf.namespace'])
示例#10
0
文件: Util.py 项目: drewp/FuXi
def renderNetwork(network, nsMap={}):
    """
    Takes an instance of a compiled ReteNetwork and a namespace mapping (for constructing QNames
    for rule pattern terms) and returns a BGL Digraph instance representing the Rete network
    #(from which GraphViz diagrams can be generated)
    """
    # from FuXi.Rete import BuiltInAlphaNode
    # from BetaNode import LEFT_MEMORY, RIGHT_MEMORY, LEFT_UNLINKING
    dot = Dot(graph_type='digraph')
    namespace_manager = NamespaceManager(Graph())
    for prefix, uri in list(nsMap.items()):
        namespace_manager.bind(prefix, uri, override=False)

    visitedNodes = {}
    edges = []
    idx = 0
    for node in list(network.nodes.values()):
        if node not in visitedNodes:
            idx += 1
            visitedNodes[node] = generateBGLNode(
                dot, node, namespace_manager, str(idx))
            dot.add_node(visitedNodes[node])
    nodeIdxs = {}
    for node in list(network.nodes.values()):
        for mem in node.descendentMemory:
            if not mem:
                continue
            bNode = mem.successor
        for bNode in node.descendentBetaNodes:
            for idx, otherNode in enumerate([bNode.leftNode, bNode.rightNode]):
                if node == otherNode and (node, otherNode) not in edges:
                    for i in [node, bNode]:
                        if i not in visitedNodes:
                            idx += 1
                            nodeIdxs[i] = idx
                            visitedNodes[i] = generateBGLNode(
                                dot, i, namespace_manager, str(idx))
                            dot.add_node(visitedNodes[i])
                    edge = Edge(visitedNodes[node],
                                visitedNodes[bNode],
                                label=idx == 0 and 'left' or 'right')
                    dot.add_edge(edge)
                    edges.append((node, bNode))

    return dot
示例#11
0
文件: __init__.py 项目: RDFLib/FuXi
    def testExpand(self):
        EX = Namespace("http://example.com/")
        namespace_manager = NamespaceManager(Graph())
        namespace_manager.bind('ex', EX, override=False)
        self.testGraph.namespace_manager = namespace_manager

        man = Class(EX.Man)
        boy = Class(EX.Boy)
        woman = Class(EX.Woman)
        girl = Class(EX.Girl)
        male = Class(EX.Male)
        female = Class(EX.Female)
        human = Class(EX.Human)
        animal = Class(EX.Animal)
        cat = Class(EX.Cat)
        dog = Class(EX.Dog)
        animal = Class(EX.Animal)

        animal = cat | dog | human
        human += man
        human += boy
        human += woman
        human += girl
        male += man
        male += boy
        female += woman
        female += girl

        testClass = human & ~ female
        self.assertEquals(repr(testClass), 'ex:Human THAT ( NOT ex:Female )')
        newtestClass = ComplementExpansion(testClass, debug=True)
        self.assertTrue(repr(newtestClass) in [
            '( ex:Boy or ex:Man )',
            '( ex:Man or ex:Boy )'],
            repr(newtestClass))

        testClass2 = animal & ~ (male | female)
        self.assertEquals(repr(testClass2),
                          '( ( ex:Cat or ex:Dog or ex:Human ) and ( not ( ex:Male or ex:Female ) ) )')
        newtestClass2 = ComplementExpansion(testClass2, debug=True)
        testClass2Repr = repr(newtestClass2)
        self.assertTrue(testClass2Repr in [
            '( ex:Cat or ex:Dog )',
            '( ex:Dog or ex:Cat )'],
            testClass2Repr)
示例#12
0
文件: data.py 项目: gsarma/PyOpenWorm
    def init_database(self):
        """ Open the configured database """
        self._init_rdf_graph()
        L.debug("opening " + str(self.source))
        self.source.open()
        nm = NamespaceManager(self['rdf.graph'])
        self['rdf.namespace_manager'] = nm
        self['rdf.graph'].namespace_manager = nm

        # A runtime version number for the graph should update for all changes
        # to the graph
        self['rdf.graph.change_counter'] = 0

        self['rdf.graph']._add = self['rdf.graph'].add
        self['rdf.graph']._remove = self['rdf.graph'].remove
        self['rdf.graph'].add = self._my_graph_add
        self['rdf.graph'].remove = self._my_graph_remove
        nm.bind("", self['rdf.namespace'])
示例#13
0
 def dataset(self):
     #pdb.set_trace()
     if hasattr(self._connection, 'dataset'):
         return getattr(self._connection, 'dataset')
     if self.store=='Sleepycat':
         dataset = Dataset(store=self.store, default_union=True)
         dataset.open(self.store_path, create = True)
     else:
         self.store = Virtuoso(self.connection)
         #dataset = Dataset(store=self.store, default_union=True)
         dataset = ConjunctiveGraph(store=self.store,identifier=CENDARI)
         self.store.connection # force connection
     setattr(self._connection, 'dataset', dataset)
     nm = NamespaceManager(dataset)
     for (prefix, ns) in INIT_NS.iteritems():
         nm.bind(prefix, ns)
     dataset.namespace_manager = nm
     return dataset
示例#14
0
def newgraph(request):
    print request.method

    # Create and bind namespaces
    namespace_manager = NamespaceManager(Graph())
    for ns in namespaces_dict:
        namespace_manager.bind(ns, Namespace(namespaces_dict[ns]))

    # Create a new graph
    graph = Graph()
    graph.namespace_manager = namespace_manager

    triple_list = []
    subject_list = []
    predicate_list = []

    subject_set = {}
    predicate_set = {}
    object_set = {}

    # Determine xml:base
    subject_base_test_set = {triple[0] for triple in triple_list}
    base_set = {subject[:subject.rfind("/")] for subject in subject_base_test_set}
    # If all subjects share the same substring-base, this substring-base is likely to be the xml:base.
    if len(base_set) == 1:
        base = str(list(base_set)[0]) + "/"
    else:
        base = ""

    # Serialize graph
    rdfjson = graph.serialize(None, format="rdf-json")

    # 
    triple_fetcher_classes = get_triple_fetcher_classes()

    response = render_to_response('rdfedit/triples.html',
                                  {'rdfjson': rdfjson, 'triple_list': triple_list, 'subject_set': subject_set,
                                   'predicate_set': predicate_set, 'object_set': object_set,
                                   'namespaces_dict': json.dumps(namespaces_dict), 'base': base,
                                   'triple_fetcher_classes': triple_fetcher_classes},
                                  context_instance=RequestContext(request))

    return response
示例#15
0
文件: main.py 项目: irl/womaas
def tobj(objname):
    SCHEMA = Namespace('http://schema.org/')
    SPDX = Namespace('http://www.spdx.org/rdf/terms#')
    n = NamespaceManager(Graph())
    n.bind("schema", SCHEMA)
    n.bind("spdx", SPDX)
    c = get_db().cursor()
    c.execute('SELECT * FROM objects WHERE id=?', (objname,))
    obj = c.fetchone()
    g = Graph()
    g.namespace_manager = n
    objuri = URIRef("http://localhost:5000/b/" + obj[0])
    robjuri = URIRef("http://localhost:5000/r/" + obj[0])
    md5node = BNode()
    g.add((md5node, SPDX.checksumValue, Literal(obj[2])))
    g.add((md5node, SPDX.algorithm, URIRef("http://packages.qa.debian.org/#checksumAlgorithm_md5sum")))
    g.add((objuri, SPDX.checksum, md5node))
    g.add((objuri, SCHEMA.fileSize, Literal(obj[1])))
    return Response(g.serialize(format="turtle"), mimetype="text/plain")
示例#16
0
def load_ontology():
    from FuXi.Horn.HornRules import HornFromN3
    from FuXi.Rete.Util import generateTokenSet
    from FuXi.Rete.RuleStore import SetupRuleStore
    rule_store, rule_graph, network = SetupRuleStore(makeNetwork=True)
    for rule in HornFromN3('rdfs-rules.n3'):
        network.buildNetworkFromClause(rule)
    # for rule in HornFromN3('owl-rules.n3'):
    #     network.buildNetworkFromClause(rule)
    g = Graph(identifier='http://catalyst-fp8.eu/ontology')
    npm = NamespaceManager(g)
    g.namespace_manager = npm
    for name in ('SIOC','OA','CATALYST','IDEA','IBIS','VOTE','VERSION','ASSEMBL','OWL','RDF', 'OWL', 'RDFS', 'XSD'):
        npm.bind(name.lower(), globals()[name])
    for f in ontology_files:
        g.parse(join(dirname(__file__), f), format='turtle')
    network.feedFactsToAdd(generateTokenSet(g))
    for n in network.inferredFacts.triples((None, None, None)):
        g.add(n)
    return g
示例#17
0
文件: rdfdb.py 项目: t00m/KB4IT
    def __init__(self, path=None):
        """
        If not path is passed it build a graph in memory. Otherwise, it
        creates a persistent graph in disk.
        """
        if path is not None:
            # Create persistent Graph in disk
            self.path = path
            self.graph = ConjunctiveGraph('Sleepycat', URIRef("kb4it://"))
            graph_path = path + SEP + 'kb4it.graph'
            self.graph.store.open(graph_path)
        else:
            # Create Graph in Memory
            self.graph = ConjunctiveGraph('IOMemory')

        # Assign namespaces to the Namespace Manager of this graph
        namespace_manager = NamespaceManager(ConjunctiveGraph())
        for ns in NSBINDINGS:
            namespace_manager.bind(ns, NSBINDINGS[ns])
        self.graph.namespace_manager = namespace_manager
示例#18
0
class Prologue:

    """
    A class for holding prefixing bindings and base URI information
    """

    def __init__(self):
        self.base = None
        self.namespace_manager = NamespaceManager(
            Graph())  # ns man needs a store

    def resolvePName(self, prefix, localname):
        ns = self.namespace_manager.store.namespace(prefix or "")
        if ns is None:
            raise Exception('Unknown namespace prefix : %s' % prefix)
        return URIRef(ns + (localname or ""))

    def bind(self, prefix, uri):
        self.namespace_manager.bind(prefix, uri, replace=True)

    def absolutize(self, iri):

        """
        Apply BASE / PREFIXes to URIs
        (and to datatypes in Literals)

        TODO: Move resolving URIs to pre-processing
        """

        if isinstance(iri, CompValue):
            if iri.name == 'pname':
                return self.resolvePName(iri.prefix, iri.localname)
            if iri.name == 'literal':
                return Literal(
                    iri.string, lang=iri.lang,
                    datatype=self.absolutize(iri.datatype))
        elif isinstance(iri, URIRef) and not ':' in iri:
            return URIRef(iri, base=self.base)

        return iri
def main(labeled, wid_title_mapping, processed_out, discarded_out, dataset, format,
         resource_namespace, fact_namespace, ontology_namespace):

    # Namespace prefixes for RDF serialization
    RESOURCE_NS = Namespace(resource_namespace)
    FACT_EXTRACTION_NS = Namespace(fact_namespace)
    ONTOLOGY_NS = Namespace(ontology_namespace)
    NAMESPACE_MANAGER = NamespaceManager(Graph())
    NAMESPACE_MANAGER.bind('resource', RESOURCE_NS)
    NAMESPACE_MANAGER.bind('fact', FACT_EXTRACTION_NS)
    NAMESPACE_MANAGER.bind('ontology', ONTOLOGY_NS)

    mapping = json.load(wid_title_mapping)
    with codecs.open(labeled, 'rb', 'utf8') as f:
        labeled = json.load(f)

    processed, discarded = to_assertions(labeled, mapping, NAMESPACE_MANAGER, {
                                            'ontology': ONTOLOGY_NS,
                                            'resource': RESOURCE_NS,
                                            'fact_extraction': FACT_EXTRACTION_NS,
                                         }, outfile=dataset, format=format)
    with codecs.open(processed_out, 'wb', 'utf8') as f:
        f.writelines('\n'.join(processed))

    with codecs.open(discarded_out, 'wb', 'utf8') as f:
        f.writelines('\n'.join(discarded))
示例#20
0
    def serialize(self, add, delete):
        diff = Namespace("http://topbraid.org/diff#")

        g = ConjunctiveGraph()

        namespace_manager = NamespaceManager(g)
        namespace_manager.bind('diff', diff, override=False)
        namespace_manager.bind('owl', OWL, override=False)

        graphUris = set(delete.keys()) | set(add.keys())

        for graphUri in graphUris:
            if (graphUri in delete.keys() and len(delete[graphUri]) > 0) or (graphUri in add.keys() and len(add[graphUri]) > 0):
                changeset = Namespace("urn:diff:" + str(uuid.uuid1()))
                graphTerm = changeset.term("")
                if str(graphUri) != 'http://quitdiff.default/':
                    g.add((graphTerm, OWL.imports, graphUri, graphTerm))
                g.add((graphTerm, RDF.type, OWL.Ontology, graphTerm))
                g.add((graphTerm, OWL.imports, diff.term(""), graphTerm))
                if graphUri in delete.keys() and len(delete[graphUri]) > 0:
                    i = 0
                    for triple in delete[graphUri]:
                        deleteStatementName = BNode()
                        g.add((deleteStatementName, RDF.type, diff.DeletedTripleDiff, graphTerm))
                        g.add((deleteStatementName, RDF.subject, triple[0], graphTerm))
                        g.add((deleteStatementName, RDF.predicate, triple[1], graphTerm))
                        g.add((deleteStatementName, RDF.object, triple[2], graphTerm))
                        i += 1
                if graphUri in add.keys() and len(add[graphUri]) > 0:
                    i = 0
                    for triple in add[graphUri]:
                        insertGraphName = BNode()
                        g.add((insertGraphName, RDF.type, diff.AddedTripleDiff, graphTerm))
                        g.add((insertGraphName, RDF.subject, triple[0], graphTerm))
                        g.add((insertGraphName, RDF.predicate, triple[1], graphTerm))
                        g.add((insertGraphName, RDF.object, triple[2], graphTerm))
                        i += 1

        return g.serialize(format="trig").decode("utf-8")
示例#21
0
class Prologue(object):
    """
    A class for holding prefixing bindings and base URI information
    """
    def __init__(self):
        self.base = None
        self.namespace_manager = NamespaceManager(
            Graph())  # ns man needs a store

    def resolvePName(self, prefix, localname):
        ns = self.namespace_manager.store.namespace(prefix or "")
        if ns is None:
            raise Exception("Unknown namespace prefix : %s" % prefix)
        return URIRef(ns + (localname or ""))

    def bind(self, prefix, uri):
        self.namespace_manager.bind(prefix, uri, replace=True)

    def absolutize(self, iri):
        """
        Apply BASE / PREFIXes to URIs
        (and to datatypes in Literals)

        TODO: Move resolving URIs to pre-processing
        """

        if isinstance(iri, CompValue):
            if iri.name == "pname":
                return self.resolvePName(iri.prefix, iri.localname)
            if iri.name == "literal":
                return Literal(iri.string,
                               lang=iri.lang,
                               datatype=self.absolutize(iri.datatype))
        elif isinstance(iri, URIRef) and not ":" in iri:
            return URIRef(iri, base=self.base)

        return iri
示例#22
0
class QNameManager(object):
    def __init__(self, nsDict=None):
        self.nsDict = nsDict and nsDict or {}
        self.nsMgr = NamespaceManager(Graph())
        self.nsMgr.bind('owl', 'http://www.w3.org/2002/07/owl#')
        self.nsMgr.bind('math', 'http://www.w3.org/2000/10/swap/math#')

    def bind(self, prefix, namespace):
        self.nsMgr.bind(prefix, namespace)
示例#23
0
class QNameManager(object):
    def __init__(self,nsDict=None):
        self.nsDict = nsDict and nsDict or {}
        self.nsMgr = NamespaceManager(Graph())
        self.nsMgr.bind('owl','http://www.w3.org/2002/07/owl#')
        self.nsMgr.bind('math','http://www.w3.org/2000/10/swap/math#')

    def bind(self,prefix,namespace):
        self.nsMgr.bind(prefix,namespace)
示例#24
0
def get_graph():
    namespace_manager = NamespaceManager(Graph())
    namespace_manager.bind('owl', OWL_NS, override = False)
    namespace_manager.bind('swo', swoNs, override = False)
    namespace_manager.bind('cas', casNs, override = False)
    namespace_manager.bind('dcat', dcatNs, override = False)
    g = Graph()
    g.namespace_manager = namespace_manager
    return g
示例#25
0
 def canonicalTerm(self, term):
     if isinstance(term, URIRef):
         if self.prolog is not None:
             namespace_manager = NamespaceManager(Graph())
             for prefix, uri in self.prolog.prefixBindings.items():
                 namespace_manager.bind(prefix, uri, override=False)
             try:
                 prefix, uri, localName = namespace_manager.compute_qname(
                     term)
             except:
                 return term
             if prefix not in self.prolog.prefixBindings:
                 return term
             else:
                 return u':'.join([prefix, localName])
         else:
             return term
     elif isinstance(term, Literal):
         return term.n3()
     elif isinstance(term, BNode):
         return term.n3()
     else:
         assert isinstance(term, Variable)
         return term.n3()
示例#26
0
def write_to_turtle(news_results, country_str):
    
    g = Graph()
    namespace_manager = NamespaceManager(Graph())
    n_dbpedia_res = Namespace("http://dbpedia.org/resource/")
    n_custom_ontology = Namespace("http://www.semanticweb.org/sws/group4/ontology/")
    n_custom_resources = Namespace("http://www.semanticweb.org/sws/group4/resources/")

    namespace_manager.bind('dbp', n_dbpedia_res, override=False)
    namespace_manager.bind('swo', n_custom_ontology, override=False)
    namespace_manager.bind('sws', n_custom_resources, override=False)
    g.namespace_manager = namespace_manager

    country = n_dbpedia_res[country_str.replace(" ", "_")]

    for news_entity in news_results:
        # article = BNode()
        hash_input = news_entity['url'] + news_entity['title'] + news_entity['publication_date']
        # generate 15 digit hash for name
        news_id = int(hashlib.sha256(hash_input.encode('utf-8')).hexdigest(), 16) % 10**15
        article = URIRef(n_custom_resources['Article-' + str(news_id)])

        g.add((article, RDF.type, n_custom_ontology['NewsArticle']))

        # data properties
        g.add((article,n_custom_ontology['origin'], Literal(news_entity['host'])))
        g.add((article, n_custom_ontology['sourceUrl'], Literal(news_entity['url'])))
        g.add((article, n_custom_ontology['publicationDate'], Literal(news_entity['publication_date'], datatype=XSD.dateTime)))
        g.add((article, n_custom_ontology['title'], Literal(news_entity['title'])))
        if news_entity['sentiment']:
            g.add((article, n_custom_ontology['sentiment'], Literal(news_entity['sentiment'])))

        # object properties
        g.add((article, n_custom_ontology['mentionsCountry'], country))

        # blank node for related resources
        for related_res in news_entity['related_res']:
            # rel = BNode()
            # generate 15 digit hash for news article mention
            s = str(related_res[1]) + str(related_res[0]) + str(news_id)
            mention_id = int(hashlib.sha256(s.encode('utf-8')).hexdigest(), 16) % 10 ** 15

            rel = URIRef(n_custom_resources['ArticleMention-' + str(mention_id)])

            g.add((rel, RDF.type, n_custom_ontology['ArticleMention']))
            g.add((rel, n_custom_ontology['relevance'], Literal(related_res[1])))
            g.add((rel, n_custom_ontology['mentionsResource'], URIRef(related_res[0])))

            g.add((article, n_custom_ontology['hasMention'], rel))


    # write to output file
    g.serialize(destination=f'ttl/{country_str.replace(" ","_")}.ttl', format='turtle')
    def _configure_namespaces(self):
        """
        Loads all the registered namespaces from the configuration file at
        `./config.json` and registers the namespaces and their prefixes in the
        Graph.

        :rtype: None
        """
        self.ns = {}
        namespaces = dcat_config['namespaces']

        for prefix, namespace in namespaces.iteritems():
            self.ns[prefix] = Namespace(namespace)

        ns_manager = NamespaceManager(self.graph)
        [
            ns_manager.bind(prefix.lower(), namespace, override=True)
            for prefix, namespace in self.ns.iteritems()
        ]
        self.graph.namespace_manager = ns_manager
示例#28
0
文件: prefixes.py 项目: wetneb/pynif
class NIFPrefixes:
    
    def __init__(self):
        self.manager = NamespaceManager(Graph())
        self.manager.bind("xsd", XSD)
        self.manager.bind("itsrdf", ITSRDF)
        self.manager.bind("nif", NIF)
        
        self._XSD = '@prefix xsd:   <http://www.w3.org/2001/XMLSchema#> .\n'
        self._ITSRDF = '@prefix itsrdf: <http://www.w3.org/2005/11/its/rdf#> .\n'
        self._NIF = '@prefix nif:   <http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#> .\n'

    @property
    def turtle(self):
        return self._XSD + self._ITSRDF + self._NIF
    def deflate(self, model_object, props, rels):
        """Overrides the `BaseSerializer` method to add graph logic."""
        namespace_manager = NamespaceManager(self.g)
        namespace_manager.bind(Ori.prefix, Namespace(Ori.uri), override=False)
        namespace_manager.bind(model_object.prefix,
                               Namespace(model_object.uri),
                               override=False)

        s = URIRef('{}{}'.format(Ori.uri, model_object.get_ori_identifier()))
        p = URIRef('{}type'.format(Rdf.uri))
        o = URIRef(self.uri_format(model_object))
        self.g.add((
            s,
            p,
            o,
        ))

        for name, definition in model_object.definitions(props=props,
                                                         rels=rels):
            value = model_object.values.get(name, None)
            if value:
                p = URIRef(self.uri_format(definition))
                try:
                    o = self.serialize_prop(definition, value)
                except MissingProperty:
                    raise

                namespace_manager.bind(definition.ns.prefix,
                                       Namespace(definition.ns.uri),
                                       override=False)
                if type(o) != list:
                    self.g.add((
                        s,
                        p,
                        o,
                    ))
                else:
                    for oo in o:
                        self.g.add((
                            s,
                            p,
                            oo,
                        ))
            elif definition.required and not model_object.skip_validation:
                raise RequiredProperty(
                    "Property '{}' is required for {}".format(
                        name, model_object.compact_uri()))
示例#30
0
def create_molecule_graph(uri_base, mol):
    mongochem = Namespace('%s/api/v1/molecules/' % uri_base)
    g = Graph()
    inchi = mol['inchi']
    name = mol['name']
    inchi_node = BNode()

    molecule = URIRef(mongochem[mol['_id']])

    namespace_manager = NamespaceManager(g)
    namespace_manager.bind('cheminf', cheminf, override=False)
    namespace_manager.bind('mongochem', mongochem, override=False)
    namespace_manager.bind('owl', OWL, override=False)

    g.add((molecule, OWL.subClassOf, cheminf.CHEMINF_000000))
    g.add((molecule, OWL.label, Literal(name.lower())))
    g.add((inchi_node, RDF.type, cheminf.CHEMINF_000113))
    g.add((inchi_node, cheminf.SIO_000300, Literal(inchi)))
    g.add((molecule, cheminf.CHEMINF_000200, inchi_node))

    return g.serialize()
示例#31
0
def write_towns_to_turtle(towns, countryTowns):
    g = Graph()
    namespace_manager = NamespaceManager(Graph())
    n_dbpedia_res = Namespace("http://dbpedia.org/resource/")
    n_dbo_res = Namespace("http://dbpedia.org/ontology/")
    n_custom_ontology = Namespace("http://www.semanticweb.org/sws/group4/ontology/")
    #n_custom_resources = Namespace("http://www.semanticweb.org/sws/group4/resources/")

    namespace_manager.bind('swo', n_custom_ontology, override=False)
    #namespace_manager.bind('sws', n_custom_resources, override=False)
    namespace_manager.bind('dbp', n_dbpedia_res, override=False)
    namespace_manager.bind('dbo', n_dbo_res, override=False)
    g.namespace_manager = namespace_manager
    
    # town = n_dbpedia_res[country_str.replace(" ", "_")]
    for iso, name, lat, lon, templ, temph, df, dt, mf, mt in towns:
        t = URIRef(n_dbpedia_res[name.replace(' ','_')])

        # add the town as a named individual
        g.add((t, RDF.type, n_dbo_res.Town))

        # add data props
        g.add((t, n_custom_ontology['townName'], Literal(name)))
        g.add((t, n_custom_ontology['latitude'], Literal(lat)))
        g.add((t, n_custom_ontology['longitude'], Literal(lon)))
        g.add((t, n_custom_ontology['tempTypicalLow'], Literal(templ)))
        g.add((t, n_custom_ontology['tempTypicalHigh'], Literal(temph)))
        g.add((t, n_custom_ontology['dayFrom'], Literal(df)))
        g.add((t, n_custom_ontology['dayTo'], Literal(dt)))
        g.add((t, n_custom_ontology['monthFrom'], Literal(mf)))
        g.add((t, n_custom_ontology['monthTo'], Literal(mt)))

    for country, town in countryTowns:
        if (country is not None and town is not None):
            c = URIRef(n_dbpedia_res[country.replace(" ", "_")])
            t = URIRef(n_dbpedia_res[town.replace(" ", "_")])
            g.add((c, n_custom_ontology['hasTown'], t))
    
    # write to output file
    g.serialize(destination=f'ttl/towns.ttl', format='turtle')
def write_to_turtle_rdf(df, output_file):
    g = Graph()
    namespace_manager = NamespaceManager(Graph())
    n_geo = Namespace("http://sws.geonames.org/")
    n_custom_ont = Namespace(
        "http://vocab.informatik.tuwien.ac.at/VU184.729-2018/e01429253/ontology/"
    )
    n_custom_cls = Namespace(
        "http://vocab.informatik.tuwien.ac.at/VU184.729-2018/e01429253/class/")
    n_time = Namespace("http://www.w3.org/2006/time/")

    namespace_manager.bind('tuwo', n_custom_ont, override=False)
    namespace_manager.bind('tuwc', n_custom_cls, override=False)
    namespace_manager.bind('gn', n_geo, override=False)
    namespace_manager.bind('time', n_time, override=False)
    g.namespace_manager = namespace_manager

    # define properties
    movement_property = n_custom_ont['populationMovement']
    orig_country_property = n_custom_ont['countryOfOrigin']
    pop_type_property = n_custom_ont['populationType']
    year_property = n_time['year']
    pop_amount_property = n_custom_ont['peopleAmount']

    # add nodes to the graph
    for index, row in df.iterrows():
        # blank node for connection
        relation_node = BNode()  # a GUID is generated

        # base triple (residence_country, movement, blank_node)
        g.add((n_geo[str(int(row[0]))], movement_property, relation_node))

        # child properties
        g.add((relation_node, orig_country_property, n_geo[str(int(row[1]))]))
        g.add((relation_node, pop_type_property, n_custom_cls[row[2]]))
        g.add((relation_node, year_property, Literal(int(row[3]))))
        g.add((relation_node, pop_amount_property, Literal(int(row[4]))))

    # write to output file
    g.serialize(destination=output_file, format='turtle')
示例#33
0
def generate_authority_rdf(authority):
    g = Graph()

    auth = URIRef("http://data.isiscb.org/authority/" + authority.id) #urllib.quote(authority.name.replace(" ", "_")))

    type = get_auth_type(authority.type_controlled)
    if not type:
        return ''
    g.add( (auth, RDF.type, type) )
    g.add( (auth, RDF.type, madsrdf.Authority) )
    g.add( (auth, RDFS.label, Literal(authority.name)) )
    g.add( (auth, madsrdf.authoritativeLabel, Literal(authority.name)) )

    for attr in authority.attributes.all():
        attr_pred = get_property(attr.type_controlled.name)
        if attr_pred:
            g.add( (auth, attr_pred, Literal(attr.value_freeform)))

    nsMgr = NamespaceManager(g)
    nsMgr.bind('madsrdf', madsrdf)
    nsMgr.bind('isiscb', isisns)
    nsMgr.bind('isisvocab', isisns_props)
    return g.serialize(format='application/rdf+xml')
示例#34
0
def create_molecule_graph(uri_base, mol):
    mongochem = Namespace('%s/api/v1/molecules/' % uri_base)
    g = Graph()
    inchi = mol['inchi']
    name = mol.get('name')
    inchi_node = BNode()

    molecule = URIRef(mongochem[mol['_id']])

    namespace_manager = NamespaceManager(g)
    namespace_manager.bind('cheminf', cheminf, override=False)
    namespace_manager.bind('mongochem', mongochem, override=False)
    namespace_manager.bind('owl', OWL, override=False)

    g.add((molecule, OWL.subClassOf, cheminf.CHEMINF_000000))

    if name is not None:
        g.add((molecule, OWL.label, Literal(name.lower())))

    g.add((inchi_node, RDF.type, cheminf.CHEMINF_000113))
    g.add((inchi_node, cheminf.SIO_000300, Literal(inchi)))
    g.add((molecule, cheminf.CHEMINF_000200, inchi_node))

    return g.serialize()
示例#35
0
SKOS = Namespace('http://www.w3.org/2004/02/skos/core#')
#RDF namespace
RDF = Namespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#')
#CiTO namespace
CITO = Namespace('http://purl.org/spar/cito/')
#RDFS
RDFS = Namespace('http://www.w3.org/2000/01/rdf-schema#')
#LThe local namespace
VLOCAL = Namespace('http://connect.unavco.org/ontology/vlocal#')
#WGS84 namespace
WGS84 = Namespace('http://www.w3.org/2003/01/geo/wgs84_pos#')
#OWL namespace
OWL = Namespace('http://www.w3.org/2002/07/owl#')

VITROPUBLIC = Namespace('http://vitro.mannlib.cornell.edu/ns/vitro/public#')

ns_manager = NamespaceManager(Graph())
ns_manager.bind('d', D)
ns_manager.bind('vivo', VIVO)
ns_manager.bind('vcard', VCARD)
ns_manager.bind('obo', OBO)
ns_manager.bind('bibo', BIBO)
ns_manager.bind("foaf", FOAF)
ns_manager.bind("skos", SKOS)
ns_manager.bind("cito", CITO)
ns_manager.bind("rdfs", RDFS)
ns_manager.bind("vlocal", VLOCAL)
ns_manager.bind("wgs84", WGS84)
ns_manager.bind("vitropublic", VITROPUBLIC)
ns_manager.bind("owl", OWL)
示例#36
0
class ClaimsKGGenerator:
    def __init__(self,
                 model_uri,
                 sparql_wrapper=None,
                 threshold=0.3,
                 include_body: bool = False,
                 resolve: bool = True,
                 use_caching: bool = False):
        self._graph = rdflib.Graph()
        self.thesoz = SkosThesaurusMatcher(
            self._graph,
            thesaurus_path="claimskg/data/thesoz-komplett.xml",
            skos_xl_labels=True,
            prefix="http://lod.gesis.org/thesoz/")
        self._graph = self.thesoz.get_merged_graph()

        self.unesco = SkosThesaurusMatcher(
            self._graph,
            thesaurus_path="claimskg/data/unesco-thesaurus.xml",
            skos_xl_labels=False,
            prefix="http://vocabularies.unesco.org/thesaurus/")

        self._graph = self.unesco.get_merged_graph()

        self._graph.load("claimskg/data/dbpedia_categories_lang_en_skos.ttl",
                         format="turtle")

        self._sparql_wrapper = sparql_wrapper  # type: SPARQLWrapper
        self._uri_generator = ClaimsKGURIGenerator(model_uri)
        self._threshold = threshold
        self._include_body = include_body
        self._resolve = resolve
        self._use_caching = use_caching

        self.model_uri = model_uri
        self._namespace_manager = NamespaceManager(Graph())

        self._claimskg_prefix = rdflib.Namespace(model_uri)
        self._namespace_manager.bind('claimskg',
                                     self._claimskg_prefix,
                                     override=False)
        self._namespace_manager.bind('base',
                                     self._claimskg_prefix,
                                     override=True)

        self.counter = TypedCounter()

        self._rdfs_prefix = rdflib.Namespace(
            "http://www.w3.org/2000/01/rdf-schema#")
        self._namespace_manager.bind('rdfs', self._rdfs_prefix, override=False)

        self._schema_prefix = rdflib.Namespace("http://schema.org/")
        self._namespace_manager.bind('schema',
                                     self._schema_prefix,
                                     override=False)

        self._namespace_manager.bind('owl', OWL, override=True)

        self._dbo_prefix = rdflib.Namespace("http://dbpedia.org/ontology/")
        self._namespace_manager.bind("dbo", self._dbo_prefix, override=False)

        self._dbr_prefix = rdflib.Namespace("http://dbpedia.org/resource/")
        self._namespace_manager.bind("dbr", self._dbr_prefix, override=False)

        self._dbc_prefix = rdflib.Namespace(
            "http://dbpedia.org/resource/Category_")
        self._namespace_manager.bind("dbc", self._dbr_prefix, override=False)

        self._dcat_prefix = rdflib.Namespace("http://www.w3.org/ns/dcat#")
        self._namespace_manager.bind("dcat", self._dcat_prefix, override=False)

        self._dct_prefix = rdflib.Namespace("http://purl.org/dc/terms/")
        self._namespace_manager.bind("dct", self._dct_prefix, override=False)

        self._foaf_prefix = rdflib.Namespace("http://xmlns.com/foaf/0.1/")
        self._namespace_manager.bind("foaf", self._foaf_prefix, override=False)

        self._vcard_prefix = rdflib.Namespace(
            "http://www.w3.org/2006/vcard/ns#")
        self._namespace_manager.bind("vcard",
                                     self._vcard_prefix,
                                     override=False)

        self._adms_prefix = Namespace("http://www.w3.org/ns/adms#")
        self._namespace_manager.bind("adms", self._adms_prefix, override=False)

        self._skos_prefix = Namespace("http://www.w3.org/2004/02/skos/core#")
        self._namespace_manager.bind("skos", self._skos_prefix, override=False)

        self._owl_same_as = URIRef(OWL['sameAs'])

        self._schema_claim_review_class_uri = URIRef(
            self._schema_prefix['ClaimReview'])
        self._schema_creative_work_class_uri = URIRef(
            self._schema_prefix['CreativeWork'])
        self._schema_organization_class_uri = URIRef(
            self._schema_prefix['Organization'])
        self._schema_thing_class_uri = URIRef(self._schema_prefix['Thing'])
        self._schema_rating_class_uri = URIRef(self._schema_prefix['Rating'])
        self._schema_language_class_uri = URIRef(
            self._schema_prefix['Language'])

        self._schema_claim_reviewed_property_uri = URIRef(
            self._schema_prefix['claimReviewed'])
        self._schema_url_property_uri = URIRef(self._schema_prefix['url'])
        self._schema_name_property_uri = URIRef(self._schema_prefix['name'])
        self._schema_date_published_property_uri = URIRef(
            self._schema_prefix['datePublished'])
        self._schema_in_language_preperty_uri = URIRef(
            self._schema_prefix['inLanguage'])
        self._schema_author_property_uri = URIRef(
            self._schema_prefix['author'])
        self._schema_same_as_property_uri = URIRef(
            self._schema_prefix['sameAs'])
        self._schema_citation_preperty_uri = URIRef(
            self._schema_prefix['citation'])
        self._schema_item_reviewed_property_uri = URIRef(
            self._schema_prefix['itemReviewed'])
        self._schema_alternate_name_property_uri = URIRef(
            self._schema_prefix['alternateName'])
        self._schema_description_property_uri = URIRef(
            self._schema_prefix['description'])
        self._schema_rating_value_property_uri = URIRef(
            self._schema_prefix['ratingValue'])
        self._schema_mentions_property_uri = URIRef(
            self._schema_prefix['mentions'])
        self._schema_keywords_property_uri = URIRef(
            self._schema_prefix['keywords'])
        self._schema_headline_property_uri = URIRef(
            self._schema_prefix['headline'])
        self._schema_review_body_property_uri = URIRef(
            self._schema_prefix['reviewBody'])
        self._schema_text_property_uri = URIRef(self._schema_prefix['text'])

        self._iso1_language_tag = "en"
        self._iso3_language_tag = "eng"

        self._english_uri = URIRef(self._claimskg_prefix["language/English"])
        self._graph.add(
            (self._english_uri, RDF.type, self._schema_language_class_uri))
        self._graph.add(
            (self._english_uri, self._schema_alternate_name_property_uri,
             Literal(self._iso1_language_tag)))
        self._graph.add((self._english_uri, self._schema_name_property_uri,
                         Literal("English")))

        self._nif_prefix = rdflib.Namespace(
            "http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#")
        self._namespace_manager.bind('nif', self._nif_prefix, override=False)

        self._nif_RFC5147String_class_uri = URIRef(
            self._nif_prefix['RFC5147String'])
        self._nif_context_class_uri = URIRef(self._nif_prefix['Context'])

        self._nif_source_url_property_uri = URIRef(
            self._nif_prefix['sourceUrl'])
        self._nif_begin_index_property_uri = URIRef(
            self._nif_prefix["beginIndex"])
        self._nif_end_index_property_uri = URIRef(self._nif_prefix["endIndex"])
        self._nif_is_string_property_uri = URIRef(self._nif_prefix["isString"])

        self._its_prefix = rdflib.Namespace(
            "https://www.w3.org/2005/11/its/rdf#")
        self._namespace_manager.bind('itsrdf',
                                     self._its_prefix,
                                     override=False)

        self.its_ta_confidence_property_uri = URIRef(
            self._its_prefix['taConfidence'])
        self.its_ta_ident_ref_property_uri = URIRef(
            self._its_prefix['taIdentRef'])

        self._logical_view_claims = []  # type: List[ClaimLogicalView]
        self._creative_works_index = []

        self.keyword_uri_set = set()

        self.global_statistics = ClaimsKGStatistics()
        self.per_source_statistics = {}

    def _create_schema_claim_review(self, row, claim: ClaimLogicalView):
        claim_review_instance = self._uri_generator.claim_review_uri(row)
        self._graph.add((claim_review_instance, RDF.type,
                         self._schema_claim_review_class_uri))

        # claim_reviewed_value = _normalize_text_fragment(_row_string_value(row, "claimReview_claimReviewed"))
        # self._graph.add(
        #     (claim_review_instance, self._schema_claim_reviewed_property_uri,
        #      Literal(claim_reviewed_value,
        #              lang=self._iso1_language_tag)))

        headline_value = _row_string_value(row, "extra_title")

        if len(headline_value) > 0:
            self._graph.add(
                (claim_review_instance, self._schema_headline_property_uri,
                 Literal(headline_value, lang=self._iso1_language_tag)))
            claim.text_fragments.append(headline_value)
            claim.has_headline = True

        # Include body only if the option is enabled

        body_value = _row_string_value(row, "extra_body")
        if len(body_value) > 0:
            claim.has_body_text = True
            claim.text_fragments.append(_normalize_text_fragment(body_value))
            if self._include_body:
                self._graph.add((claim_review_instance,
                                 self._schema_review_body_property_uri,
                                 Literal(body_value,
                                         lang=self._iso1_language_tag)))

        claim_review_url = row['claimReview_url']
        claim.claim_review_url = claim_review_url

        if claim_review_url is not None:
            self._graph.add(
                (claim_review_instance, self._schema_url_property_uri,
                 URIRef(row['claimReview_url'])))

        review_date = row['claimReview_datePublished']
        if review_date:
            self._graph.add((claim_review_instance,
                             self._schema_date_published_property_uri,
                             Literal(review_date, datatype=XSD.date)))
            claim.review_date = datetime.datetime.strptime(
                review_date, "%Y-%m-%d").date()
        self._graph.add(
            (claim_review_instance, self._schema_in_language_preperty_uri,
             self._english_uri))

        return claim_review_instance

    def _create_organization(self, row, claim):
        organization = self._uri_generator.organization_uri(row)
        self._graph.add(
            (organization, RDF.type, self._schema_organization_class_uri))

        claim.claimreview_author = row['claimReview_author_name']

        self._graph.add((organization, self._schema_name_property_uri,
                         Literal(row['claimReview_author_name'],
                                 lang=self._iso1_language_tag)))

        author_name = _row_string_value(row, 'claimReview_author_name')
        if len(author_name) > 0:
            self._graph.add((organization, self._schema_url_property_uri,
                             URIRef(source_uri_dict[author_name])))

        return organization

    def _create_claims_kg_organization(self):
        organization = self._uri_generator.claimskg_organization_uri()
        self._graph.add(
            (organization, RDF.type, self._schema_organization_class_uri))

        self._graph.add((organization, self._schema_name_property_uri,
                         Literal("ClaimsKG")))

        self._graph.add((organization, self._schema_url_property_uri,
                         URIRef(self.model_uri)))

    def _reconcile_keyword_annotations(self,
                                       claim,
                                       keyword_uri,
                                       keyword,
                                       matching_annotations,
                                       type="thesoz"):
        for annotation in matching_annotations:
            self._graph.add((keyword_uri, URIRef(self._dct_prefix["about"]),
                             URIRef(annotation[0])))
            if type == "thesoz":
                claim.keywords_thesoz.add(keyword)
            else:
                claim.keywords_unesco.add(keyword)

    def _reconcile_keyword_mention_with_annotations(self,
                                                    claim,
                                                    mention,
                                                    dbpedia_entity,
                                                    keyword,
                                                    matching_annotations,
                                                    type="thesoz"):
        start = mention['begin']
        end = mention['end']
        for matching_annotation in matching_annotations:
            if start == matching_annotation[2] and end == matching_annotation[
                    3]:
                if type == "thesoz":
                    claim.keywords_thesoz_dbpedia.add(keyword)
                elif type == "unesco":
                    claim.keywords_unesco_dbpedia.add(keyword)
                self._graph.add((URIRef(dbpedia_entity), OWL.sameAs,
                                 URIRef(matching_annotation[0])))

    def _create_creative_work(self, row, claim: ClaimLogicalView):
        creative_work = self._uri_generator.creative_work_uri(row)
        self._graph.add(
            (creative_work, RDF.type, self._schema_creative_work_class_uri))

        date_published_value = _row_string_value(row,
                                                 "creativeWork_datePublished")
        if len(date_published_value) > 0:
            self._graph.add(
                (creative_work, self._schema_date_published_property_uri,
                 Literal(date_published_value, datatype=XSD.date)))
            claim.claim_date = datetime.datetime.strptime(
                date_published_value, "%Y-%m-%d").date()

        keywords = row['extra_tags']
        if isinstance(keywords, str) and len(keywords) > 0:
            keyword_mentions = self._process_json(
                row['extra_entities_keywords'])
            if not keyword_mentions:
                keyword_mentions = []
            if ";" in keywords:
                keyword_list = keywords.split(";")
            else:
                keyword_list = keywords.split(",")

            for keyword in keyword_list:
                keyword = keyword.strip()
                keyword_uri = self._uri_generator.keyword_uri(keyword)
                if keyword_uri not in self.keyword_uri_set:
                    self._graph.add(
                        (keyword_uri, RDF.type, self._schema_thing_class_uri))
                    self._graph.add(
                        (keyword_uri, self._schema_name_property_uri,
                         Literal(keyword, lang=self._iso1_language_tag)))
                    thesoz_matching_annotations = self.thesoz.find_keyword_matches(
                        keyword)
                    unesco_matching_annotations = self.unesco.find_keyword_matches(
                        keyword)
                    self._reconcile_keyword_annotations(
                        claim, keyword_uri, keyword,
                        thesoz_matching_annotations)
                    self._reconcile_keyword_annotations(
                        claim,
                        keyword_uri,
                        keyword,
                        unesco_matching_annotations,
                        type="unesco")
                    for mention in keyword_mentions:
                        if keyword.lower().strip() in mention['text'].lower(
                        ).strip():
                            self.keyword_uri_set.add(keyword_uri)
                            mention_instance, dbpedia_entity = self._create_mention(
                                mention, claim, False)
                            if mention_instance:
                                claim.keywords_dbpedia.add(keyword)
                                self._graph.add(
                                    (keyword_uri,
                                     self._schema_mentions_property_uri,
                                     mention_instance))

                                self._reconcile_keyword_mention_with_annotations(
                                    claim, mention, dbpedia_entity, keyword,
                                    thesoz_matching_annotations)
                                self._reconcile_keyword_mention_with_annotations(
                                    claim,
                                    mention,
                                    dbpedia_entity,
                                    keyword,
                                    unesco_matching_annotations,
                                    type="unesco")
                claim.keywords.add(keyword.strip())

                self._graph.add(
                    (creative_work, self._schema_keywords_property_uri,
                     keyword_uri))

        links = row['extra_refered_links']
        author_url = _row_string_value(row, 'claimReview_author_url')
        if links:
            links = links[1:-1].split(",")
            for link in links:
                stripped_link = link.strip()
                if len(stripped_link) > 0 and stripped_link[0] != "#" and re.match(_is_valid_url_regex,
                                                                                   link.strip()) and link.strip() != \
                        source_uri_dict[
                            author_url]:
                    link = link.strip().replace("\\", "").replace(
                        "%20TARGET=prayer>adultery</A>%20was%20made%20public.%20</p>%0A",
                        "").replace("\"",
                                    "").replace("<img%20src=?", "").replace(
                                        ">", "").replace("</",
                                                         "").replace("<", "")

                    parsed_url = urlparse(link)
                    is_correct = (all([
                        parsed_url.scheme, parsed_url.netloc, parsed_url.path
                    ]) and len(parsed_url.netloc.split(".")) > 1
                                  and "<img" not in link)
                    if is_correct:
                        claim.links.append(link)
                        # try:
                        self._graph.add(
                            (creative_work, self._schema_citation_preperty_uri,
                             URIRef(parsed_url.scheme + "://" +
                                    parsed_url.netloc + parsed_url.path + "?" +
                                    parsed_url.query.replace("|", "%7C").
                                    replace("^", "%5E").replace("\\", "%5C").
                                    replace("{", "%7B").replace("}", "%7D").
                                    replace("&", "%26").replace("=", "%3D"))))
                    # except :
                    #     pass
        # Creative work author instantiation

        author_value = _row_string_value(row, "creativeWork_author_name")
        claim.creative_work_author = author_value

        claim_reviewed_value = _normalize_text_fragment(
            _row_string_value(row, "claimReview_claimReviewed"))
        claim.title = claim_reviewed_value
        self._graph.add((creative_work, self._schema_text_property_uri,
                         Literal(claim_reviewed_value,
                                 lang=self._iso1_language_tag)))

        if len(author_value) > 0:
            creative_work_author = self._uri_generator.creative_work_author_uri(
                row)
            self._graph.add(
                (creative_work_author, RDF.type, self._schema_thing_class_uri))

            author_mentions = self._process_json(row['extra_entities_author'])
            if not author_mentions:
                author_mentions = []
            for mention in author_mentions:
                entity_uri = mention['entity'].replace(" ", "_")
                mention_instance = self._dbr_prefix[entity_uri]
                if mention_instance:
                    self._graph.add(
                        (creative_work_author,
                         self._schema_mentions_property_uri, mention_instance))

            self._graph.add(
                (creative_work_author, self._schema_name_property_uri,
                 Literal(author_value, lang=self._iso1_language_tag)))
            self._graph.add((creative_work, self._schema_author_property_uri,
                             creative_work_author))

        # Todo: Reconcile author entities with DBPedia
        # self._graph.add((creative_work_author, self._schema_same_as_property_uri, Literal("dbpedia:link")))
        self._creative_works_index.append(creative_work)
        return creative_work

    def _create_review_rating(self, row, claim):

        original_rating = self._uri_generator.create_original_rating_uri(row)

        rating_alternate_name = row['rating_alternateName']
        if rating_alternate_name:
            escaped_alternate_rating_name = html.escape(
                row['rating_alternateName']).encode('ascii',
                                                    'xmlcharrefreplace')
            self._graph.add(
                (original_rating, self._schema_alternate_name_property_uri,
                 Literal(escaped_alternate_rating_name)))

        self._graph.add(
            (original_rating, RDF.type, self._schema_rating_class_uri))

        rating_value = row['rating_ratingValue'].replace("[", "").replace(
            "]", "").replace("'", "").replace(",", "").strip()

        if rating_value and len(rating_value) > 0:
            value = float(rating_value)
            self._graph.add(
                (original_rating, self._schema_rating_value_property_uri,
                 Literal(value, datatype=XSD.float)))

        organization = self._uri_generator.organization_uri(row)
        self._graph.add(
            (original_rating, self._schema_author_property_uri, organization))

        normalized_rating_enum = ratings.normalize(
            _row_string_value(row, "claimReview_author_name").lower(),
            _row_string_value(row, "rating_alternateName").lower())
        claim.normalized_rating = normalized_rating_enum.name
        normalized_rating = self._uri_generator.create_normalized_rating_uri(
            normalized_rating_enum)
        self._graph.add(
            (normalized_rating, RDF.type, self._schema_rating_class_uri))
        self._graph.add(
            (normalized_rating, self._schema_alternate_name_property_uri,
             Literal(str(normalized_rating_enum.name),
                     lang=self._iso1_language_tag)))

        self._graph.add(
            (normalized_rating, self._schema_rating_value_property_uri,
             Literal(normalized_rating_enum.value, datatype=XSD.integer)))

        claimskg_org = self._uri_generator.claimskg_organization_uri()
        self._graph.add((normalized_rating, self._schema_author_property_uri,
                         claimskg_org))

        return original_rating, normalized_rating

    def _create_mention(self, mention_entry, claim: ClaimLogicalView,
                        in_review):
        rho_value = float(mention_entry['score'])
        if rho_value > self._threshold:

            text = mention_entry['text']
            start = mention_entry['begin']
            end = mention_entry['end']
            entity_uri = mention_entry['entity'].replace(" ", "_")
            categories = mention_entry['categories']
            if len(categories) > 0:
                categories = categories[0].split(",")

            mention = self._uri_generator.mention_uri(
                start, end, text, entity_uri, rho_value,
                ",".join(claim.text_fragments))

            self._graph.add((mention, RDF.type, self._nif_context_class_uri))
            self._graph.add(
                (mention, RDF.type, self._nif_RFC5147String_class_uri))

            self._graph.add((mention, self._nif_is_string_property_uri,
                             Literal(text, lang=self._iso1_language_tag)))
            self._graph.add((mention, self._nif_begin_index_property_uri,
                             Literal(int(start), datatype=XSD.integer)))
            self._graph.add((mention, self._nif_end_index_property_uri,
                             Literal(int(end), datatype=XSD.integer)))

            # TODO: Fix values so that they aren't displayed in scientific notation
            self._graph.add(
                (mention, self.its_ta_confidence_property_uri,
                 Literal(float(self._format_confidence_score(mention_entry)),
                         datatype=XSD.float)))

            self._graph.add((mention, self.its_ta_ident_ref_property_uri,
                             self._dbr_prefix[entity_uri]))
            if in_review:
                claim.review_entities.append(entity_uri)
                for category in categories:
                    claim.review_entity_categories.append(category)
            else:
                claim.claim_entities.append(entity_uri)

                for category in categories:
                    claim.claim_entity_categories.append(category)

            for category in categories:
                category = category.replace(" ", "_")
                self._graph.add((mention, URIRef(self._dct_prefix["about"]),
                                 URIRef(self._dbc_prefix[category])))

            return mention, self._dbr_prefix[entity_uri]
        else:
            return None, None

    @staticmethod
    def _format_confidence_score(mention_entry):
        value = float(mention_entry['score'])
        rounded_to_two_decimals = round(value, 2)
        return str(rounded_to_two_decimals)

    def create_contact_vcard(self):
        atchechmedjiev_contact_vcard = URIRef(
            self._claimskg_prefix['atchechmedjiev_contact_vcard'])
        self._graph.add((atchechmedjiev_contact_vcard, RDF.type,
                         URIRef(self._vcard_prefix['Individual'])))
        self._graph.add(
            (atchechmedjiev_contact_vcard, self._vcard_prefix['hasEmail'],
             URIRef("mailto:[email protected]")))
        self._graph.add(
            (atchechmedjiev_contact_vcard, self._vcard_prefix['fn'],
             Literal("Andon Tchechmedjiev")))

        return atchechmedjiev_contact_vcard

    def add_dcat_metadata(self):
        claimskg = rdflib.term.URIRef(self._claimskg_prefix['claimskg'])
        self._graph.add((claimskg, RDF.type,
                         rdflib.term.URIRef(self._dcat_prefix['Dataset'])))
        self._graph.add(
            (claimskg, rdflib.term.URIRef(self._dct_prefix['title']),
             Literal("ClaimsKG")))
        self._graph.add((
            claimskg, rdflib.term.URIRef(self._dct_prefix['description']),
            Literal("ClaimsKG: A Live Knowledge Graph ofFact-Checked Claims")))

        self._graph.add(
            (claimskg, rdflib.term.URIRef(self._dct_prefix['issued']),
             rdflib.term.Literal("2019-04-10", datatype=XSD.date)))

        self._graph.add(
            (claimskg, rdflib.term.URIRef(self._dct_prefix['modified']),
             rdflib.term.Literal(datetime.datetime.now(), datatype=XSD.date)))

        doi_org = URIRef(self._claimskg_prefix['doi_org_instance'])
        self._graph.add(
            (doi_org, RDF.type, URIRef(self._foaf_prefix['Organization'])))
        self._graph.add(
            (doi_org, RDFS.label, Literal("International DOI Foundation")))
        self._graph.add((doi_org, self._foaf_prefix['homepage'],
                         URIRef("https://www.doi.org/")))

        identifier = URIRef(self._claimskg_prefix['doi_identifier'])
        self._graph.add(
            (identifier, RDF.type, self._adms_prefix['Identifier']))
        self._graph.add((identifier, self._skos_prefix['notation'],
                         URIRef("https://doi.org/10.5281/zenodo.2628745")))
        self._graph.add((identifier, self._adms_prefix['schemaAgency'],
                         Literal("International DOI Foundation")))
        self._graph.add((identifier, self._dct_prefix['creator'], doi_org))

        self._graph.add(
            (claimskg, rdflib.term.URIRef(self._dct_prefix['identifier']),
             rdflib.term.Literal("10.5281/zenodo.2628745")))

        self._graph.add(
            (claimskg, rdflib.term.URIRef(self._dct_prefix['language']),
             rdflib.term.URIRef("http://id.loc.gov/vocabulary/iso639-1/en")))

        self._graph.add(
            (claimskg,
             rdflib.term.URIRef(self._dct_prefix['accrualPeriodicity']),
             URIRef("http://purl.org/linked-data/sdmx/2009/code#freq-M")))

        self._graph.add(
            (claimskg, rdflib.term.URIRef(self._dcat_prefix['keyword']),
             Literal("Claims")))
        self._graph.add(
            (claimskg, rdflib.term.URIRef(self._dcat_prefix['keyword']),
             Literal("Facts")))
        self._graph.add(
            (claimskg, rdflib.term.URIRef(self._dcat_prefix['keyword']),
             Literal("Fact-checking")))
        self._graph.add(
            (claimskg, rdflib.term.URIRef(self._dcat_prefix['keyword']),
             Literal("Knowledge Graphs")))

        self._graph.add(
            (claimskg, rdflib.term.URIRef(self._dcat_prefix['contactPoint']),
             self.create_contact_vcard()))

        # SPARQL Distribution
        sparql_claimskg_distribution = URIRef(
            self._claimskg_prefix['sparql_claimskg_distribution'])
        self._graph.add((sparql_claimskg_distribution, RDF.type,
                         self._dcat_prefix['Distribution']))
        self._graph.add(
            (sparql_claimskg_distribution, self._dct_prefix['title'],
             Literal("SPARQL endpoint")))
        self._graph.add(
            (sparql_claimskg_distribution, self._dct_prefix['description'],
             Literal("The ClaimsKG SPARQL endpoint")))

        self._graph.add((sparql_claimskg_distribution,
                         rdflib.term.URIRef(self._dct_prefix['issued']),
                         rdflib.term.Literal("2019-04-10", datatype=XSD.date)))

        self._graph.add((sparql_claimskg_distribution,
                         rdflib.term.URIRef(self._dct_prefix['modified']),
                         rdflib.term.Literal(datetime.datetime.now(),
                                             datatype=XSD.date)))

        licence_document = URIRef(
            "https://creativecommons.org/licenses/by/4.0/")
        self._graph.add(
            (licence_document, RDF.type, self._dct_prefix['LicenseDocument']))

        self._graph.add((sparql_claimskg_distribution,
                         rdflib.term.URIRef(self._dct_prefix['license']),
                         licence_document))

        self._graph.add((sparql_claimskg_distribution,
                         rdflib.term.URIRef(self._dcat_prefix['accessURL']),
                         Literal("https://data.gesis.org/claimskg/sparql")))

        # Source code distribution
        sourcecode_claimskg_distribution = URIRef(
            self._claimskg_prefix['sourcecode_claimskg_distribution'])
        self._graph.add((sourcecode_claimskg_distribution, RDF.type,
                         self._dcat_prefix['Distribution']))
        self._graph.add(
            (sourcecode_claimskg_distribution, self._dct_prefix['title'],
             Literal("SPARQL endpoint")))
        self._graph.add(
            (sourcecode_claimskg_distribution, self._dct_prefix['description'],
             Literal("The ClaimsKG Github repository group")))

        self._graph.add((sourcecode_claimskg_distribution,
                         rdflib.term.URIRef(self._dct_prefix['issued']),
                         rdflib.term.Literal("2019-04-10", datatype=XSD.date)))

        self._graph.add((sourcecode_claimskg_distribution,
                         rdflib.term.URIRef(self._dct_prefix['modified']),
                         rdflib.term.Literal(datetime.datetime.now(),
                                             datatype=XSD.date)))

        self._graph.add((sourcecode_claimskg_distribution,
                         rdflib.term.URIRef(self._dct_prefix['license']),
                         licence_document))

        self._graph.add((sourcecode_claimskg_distribution,
                         rdflib.term.URIRef(self._dcat_prefix['accessURL']),
                         Literal("https://github.com/claimskg")))

    def generate_model(self, dataset_rows):
        row_counter = 0

        self._graph.namespace_manager = self._namespace_manager
        total_entry_count = len(dataset_rows)

        self.add_dcat_metadata()

        progress_bar = tqdm(total=total_entry_count)

        for row in dataset_rows:
            row_counter += 1
            progress_bar.update(1)

            logical_claim = ClaimLogicalView(
            )  # Instance holding claim raw information for mapping generation
            source_site = _row_string_value(row, 'claimReview_author_name')
            if source_site not in self.per_source_statistics.keys():
                self.per_source_statistics[source_site] = ClaimsKGStatistics()

            claim_review_instance = self._create_schema_claim_review(
                row, logical_claim)

            organization = self._create_organization(row, logical_claim)
            self._graph.add((claim_review_instance,
                             self._schema_author_property_uri, organization))

            creative_work = self._create_creative_work(row, logical_claim)
            self._graph.add(
                (claim_review_instance,
                 self._schema_item_reviewed_property_uri, creative_work))
            logical_claim.creative_work_uri = creative_work

            original, normalized = self._create_review_rating(
                row, logical_claim)
            self._graph.add(
                (claim_review_instance,
                 rdflib.term.URIRef(self._schema_prefix['reviewRating']),
                 original))
            self._graph.add(
                (claim_review_instance,
                 rdflib.term.URIRef(self._schema_prefix['reviewRating']),
                 normalized))

            # For claim review mentions
            entities_json = row[
                'extra_entities_claimReview_claimReviewed']  # type: str
            loaded_json = self._process_json(entities_json)
            if loaded_json:
                for mention_entry in loaded_json:
                    mention, dbpedia_entity = self._create_mention(
                        mention_entry, logical_claim, True)
                    if mention:
                        self._graph.add(
                            (creative_work, self._schema_mentions_property_uri,
                             mention))

            # For Creative Work mentions
            body_entities_json = row['extra_entities_body']
            loaded_body_json = self._process_json(body_entities_json)
            if loaded_body_json:
                for mention_entry in loaded_body_json:
                    mention, dbpedia_entity = self._create_mention(
                        mention_entry, logical_claim, False)
                    if mention:
                        self._graph.add(
                            (claim_review_instance,
                             self._schema_mentions_property_uri, mention))

            self._logical_view_claims.append(logical_claim)
            self.global_statistics.compute_stats_for_review(logical_claim)
            self.per_source_statistics[source_site].compute_stats_for_review(
                logical_claim)

        progress_bar.close()

    def _process_json(self, json_string):
        loaded_json = []
        if json_string:
            json_string = re.sub("\",\"\"", ",\"", json_string)
            json_string = re.sub('"\n\t\"', "", json_string)
            json_string = re.sub('}\]\[\]', '}]', json_string)

            if json_string == "[[][]]":
                loaded_json = []
            else:
                try:
                    loaded_json = json.loads(json_string)
                except ValueError:
                    loaded_json = None
        return loaded_json

    def export_rdf(self, format):

        print("\nGlobal dataset statistics")
        self.global_statistics.output_stats()

        print("\nPer source site statistics")

        for site in self.per_source_statistics.keys():
            print("\n\n{site} statistics...".format(site=site))
            self.per_source_statistics[site].output_stats()
        graph_serialization = self._graph.serialize(format=format,
                                                    encoding='utf-8')
        return graph_serialization

    def reconcile_claims(self,
                         embeddings,
                         theta,
                         keyword_weight,
                         link_weight,
                         text_weight,
                         entity_weight,
                         mappings_file_path=None,
                         seed=None,
                         samples=None):
        reconciler = FactReconciler(embeddings,
                                    self._use_caching,
                                    mappings_file_path,
                                    self._logical_view_claims,
                                    theta,
                                    keyword_weight,
                                    link_weight,
                                    text_weight,
                                    entity_weight,
                                    seed=seed,
                                    samples=samples)
        mappings = reconciler.generate_mappings()

        for mapping in mappings:
            if mapping is not None and mapping[
                    1] is not None and mapping[1] != (None, None):
                source = mapping[1][0]
                target = mapping[1][1]
                self._graph.add((source.creative_work_uri, OWL.sameAs,
                                 target.creative_work_uri))

    def materialize_indirect_claim_links(self):
        mdg = rdflib_to_networkx_multidigraph(self._graph)

    def align_duplicated(self):
        count = len(self._logical_view_claims)
        total = int(count * (count - 1) / 2)
        result = [
            pair for pair in tqdm(itertools.combinations(range(count), 2),
                                  total=total)
            if self.compare_claim_titles(self._logical_view_claims[pair[0]],
                                         self._logical_view_claims[pair[1]])
        ]

        for pair in result:
            self._graph.add(
                (self._creative_works_index[pair[0]], self._owl_same_as,
                 self._creative_works_index[pair[1]]))

            self.global_statistics.count_mapping()
            self.per_source_statistics[self._logical_view_claims[
                pair[0]].claimreview_author].count_mapping()

    def compare_claim_titles(self, claim_a, claim_b):
        return self._normalize_label(claim_a.title) == self._normalize_label(
            claim_b.title)

    def _normalize_label(self, label):
        return label.strip().lower().replace("\"", "").replace("'", "")
示例#37
0
    
    queries = []
    generate_queries (data, queries, str(NS['sp'][model]))
    return queries


# Query builder state variables
main_types = []
data = {}
loaded = False

# Initialize the namespace manager object
namespace_manager = NamespaceManager(Graph())

# Import the namespaces into the namespace manager
for ns in NS.keys():
    namespace_manager.bind(ns, NS[ns], override=False)
    
# Parse the ontology when necessary
if not rdf_ontology.api_types:
    rdf_ontology.parse_ontology(open(ONTOLOGY_PATH).read())

# Build a list of data types that need to be added to the data definitions
for t in rdf_ontology.api_types:
    if t.is_statement or len(t.calls) > 0 or rdf_ontology.sp.Component in [x.uri for x in t.parents]:
        main_types.append(t)

# Build the data definitions object with each data type
for t in main_types: 
    generate_data_for_type(t, data)
示例#38
0
def create_graph():

    import pymysql as mdb

    import rdflib
    from rdflib import URIRef, Literal, Namespace, Graph
    from rdflib.namespace import NamespaceManager

    con = mdb.connect(mysql_conf['host'], mysql_conf['username'],
                      mysql_conf['password'], mysql_conf['db'])
    conn = con.cursor(mdb.cursors.DictCursor)

    g = Graph()
    namespace_manager = NamespaceManager(g)

    RDF = Namespace('rdf:')
    namespace_manager.bind('RDF', RDF, override=False)

    FOAF = Namespace('foaf:')
    namespace_manager.bind('foaf', FOAF, override=False)

    WB = Namespace('wb:')
    namespace_manager.bind('wb:', WB, override=False)

    print("Dump users")
    conn.execute("SELECT * FROM user;")
    for user in conn.fetchall():

        uid = URIRef(user['uid'])
        g.add((uid, RDF.type, FOAF.Person))

        for k, v in user.items():
            if k in ['uid']: continue
            g.add((uid, FOAF[k], Literal(v)))

    print("Dump user relations")
    conn.execute("SELECT * FROM userrelation;")
    for user_rel in conn.fetchall():

        suid = URIRef(user_rel['suid'])
        tuid = URIRef(user_rel['tuid'])
        g.add((suid, FOAF.knows, tuid))

    print("Dump weibo")
    conn.execute("SELECT * FROM weibo;")
    for weibo in conn.fetchall():

        uid = URIRef(weibo['uid'])
        mid = URIRef(weibo['mid'])
        g.add((mid, RDF.type, WB.Post))
        g.add((uid, FOAF.posted, mid))

        for k, v in weibo.items():
            if k in ['uid', 'mid']: continue
            g.add((mid, WB[k], Literal(v)))

    print("Dump weibo relations")
    conn.execute("SELECT * FROM weiborelation;")
    for weibo_rel in conn.fetchall():

        smid = URIRef(weibo_rel['smid'])
        tmid = URIRef(weibo_rel['tmid'])
        g.add((smid, WB.shared, tmid))

    g.serialize(destination=os.path.join(path, "data/weibo.nt"), format='nt')
    '''Returns a list of test sparql queries for the given model'''
    
    queries = []
    generate_queries (data, queries, str(NS['sp'][model]))
    return queries


# Query builder state variables
main_types = []
data = {}
loaded = False

# Initialize the namespace manager object
namespace_manager = NamespaceManager(Graph())

# Import the namespaces into the namespace manager
for ns in NS.keys():
    namespace_manager.bind(ns, NS[ns], override=False)
    
# Parse the ontology when necessary
if not rdf_ontology.api_types:
    rdf_ontology.parse_ontology(open(APP_PATH + '/data/smart.owl').read())

# Build a list of data types that need to be added to the data definitions
for t in rdf_ontology.api_types:
    if t.is_statement or len(t.calls) > 0 or rdf_ontology.sp.Component in [x.uri for x in t.parents]:
        main_types.append(t)

# Build the data definitions object with each data type
for t in main_types: 
    generate_data_for_type(t, data)
示例#40
0
# Host config
HOSTNAME = socket.gethostname()

# S3 config
os.environ["AWS_SHARED_CREDENTIALS_FILE"] = "~/.aws/credentials"
ARCHIVE_BUCKET = "archive.tbrc.org"
OCR_OUTPUT_BUCKET = "ocr.bdrc.io"
S3 = boto3.resource("s3")
S3_client = boto3.client("s3")
archive_bucket = S3.Bucket(ARCHIVE_BUCKET)
ocr_output_bucket = S3.Bucket(OCR_OUTPUT_BUCKET)

# URI config
BDR = Namespace("http://purl.bdrc.io/resource/")
NSM = NamespaceManager(rdflib.Graph())
NSM.bind("bdr", BDR)

# s3 bucket directory config
SERVICE = "vision"
BATCH_PREFIX = "batch"
IMAGES = "images"
OUTPUT = "output"
INFO_FN = "info.json"

# local directory config
DATA_PATH = Path("./archive")
IMAGES_BASE_DIR = DATA_PATH / IMAGES
OCR_BASE_DIR = DATA_PATH / OUTPUT
CHECK_POINT_FN = DATA_PATH / "checkpoint.json"

# Checkpoint config
示例#41
0
BIBO = Namespace('http://purl.org/ontology/bibo/')
FOAF = Namespace('http://xmlns.com/foaf/0.1/')
SKOS = Namespace('http://www.w3.org/2004/02/skos/core#')
VCARD = Namespace('http://www.w3.org/2006/vcard/ns#')
OBO = Namespace('http://purl.obolibrary.org/obo/')

CONVERIS = Namespace('http://localhost/ontology/converis#')

FHP = Namespace('http://vivo.fredhutch.org/ontology/publications#')
FHD = Namespace('http://vivo.fredhutch.org/ontology/display#')

#tmp graph for in memory graphs
TMP = Namespace('http://localhost/tmp#')

namespaces = {}
for k, o in vars().items():
    if isinstance(o, (Namespace, ClosedNamespace)):
        namespaces[k] = o

ns_mgr = NamespaceManager(Graph())
for k, v in namespaces.items():
    ns_mgr.bind(k.lower(), v)

rq_prefixes = u"\n".join("prefix %s: <%s>" % (k.lower(), v)
                         for k, v in namespaces.items())

prefixes = u"\n    ".join("%s: %s" % (k.lower(), v)
                          for k, v in namespaces.items()
                          if k not in u'RDF RDFS OWL XSD')
#namespace setup complete
示例#42
0
from django.core.exceptions import ValidationError
from rdflib import Graph, BNode
from rdflib.collection import Collection
from rdflib.namespace import Namespace, NamespaceManager, DC, DCTERMS, RDF, RDFS
from rdflib.plugin import register
from rdflib.plugins.serializers.rdfxml import XMLLANG, OWL_NS, XMLBASE
from rdflib.plugins.serializers.xmlwriter import XMLWriter
from rdflib.serializer import Serializer
from rdflib.term import Literal, URIRef
from rdflib.util import first

HSTERMS = Namespace("https://www.hydroshare.org/terms/")
RDFS1 = Namespace("http://www.w3.org/2000/01/rdf-schema#")

NAMESPACE_MANAGER = NamespaceManager(Graph())
NAMESPACE_MANAGER.bind('hsterms', HSTERMS, override=False)
NAMESPACE_MANAGER.bind("rdfs1", RDFS1, override=False)
NAMESPACE_MANAGER.bind('dc', DC, override=False)
NAMESPACE_MANAGER.bind('dcterms', DCTERMS, override=False)


class RDF_MetaData_Mixin(object):
    """
    A mixin for MetaData objects which store their metadata in generic relations.  If metadata outside of generic
    relations need to be used, you may extend ingest_metadata and get_rdf_graph to include the other metadata elements
    """

    def rdf_subject(self):
        raise NotImplementedError("RDF_Metadata_Mixin implementations must implement rdf_subject")

    def rdf_metadata_subject(self):
示例#43
0
def main():
    from optparse import OptionParser
    op = OptionParser(
        'usage: %prog [options] factFile1 factFile2 ... factFileN')

    op.add_option(
        '--why',
        default=None,
        help='Specifies the goals to solve for using the non-naive methods' +
        'see --method')

    op.add_option(
        '--closure',
        action='store_true',
        default=False,
        help='Whether or not to serialize the inferred triples' +
        ' along with the original triples.  Otherwise ' +
        '(the default behavior), serialize only the inferred triples')

    op.add_option(
        '--imports',
        action='store_true',
        default=False,
        help='Whether or not to follow owl:imports in the fact graph')

    op.add_option(
        '--output',
        default='n3',
        metavar='RDF_FORMAT',
        choices=[
            'xml', 'TriX', 'n3', 'pml', 'proof-graph', 'nt', 'rif', 'rif-xml',
            'conflict', 'man-owl'
        ],
        help=
        "Serialize the inferred triples and/or original RDF triples to STDOUT "
        +
        "using the specified RDF syntax ('xml', 'pretty-xml', 'nt', 'turtle', "
        +
        "or 'n3') or to print a summary of the conflict set (from the RETE " +
        "network) if the value of this option is 'conflict'.  If the the " +
        " value is 'rif' or 'rif-xml', Then the rules used for inference " +
        "will be serialized as RIF.  If the value is 'pml' and --why is used, "
        + " then the PML RDF statements are serialized.  If output is " +
        "'proof-graph then a graphviz .dot file of the proof graph is printed. "
        +
        "Finally if the value is 'man-owl', then the RDF facts are assumed " +
        "to be OWL/RDF and serialized via Manchester OWL syntax. The default is %default"
    )

    op.add_option(
        '--class',
        dest='classes',
        action='append',
        default=[],
        metavar='QNAME',
        help='Used with --output=man-owl to determine which ' +
        'classes within the entire OWL/RDF are targetted for serialization' +
        '.  Can be used more than once')

    op.add_option(
        '--hybrid',
        action='store_true',
        default=False,
        help='Used with with --method=bfp to determine whether or not to ' +
        'peek into the fact graph to identify predicates that are both ' +
        'derived and base.  This is expensive for large fact graphs' +
        'and is explicitely not used against SPARQL endpoints')

    op.add_option(
        '--property',
        action='append',
        dest='properties',
        default=[],
        metavar='QNAME',
        help='Used with --output=man-owl or --extract to determine which ' +
        'properties are serialized / extracted.  Can be used more than once')

    op.add_option(
        '--normalize',
        action='store_true',
        default=False,
        help=
        "Used with --output=man-owl to attempt to determine if the ontology is 'normalized' [Rector, A. 2003]"
        + "The default is %default")

    op.add_option(
        '--ddlGraph',
        default=False,
        help=
        "The location of a N3 Data Description document describing the IDB predicates"
    )

    op.add_option(
        '--input-format',
        default='xml',
        dest='inputFormat',
        metavar='RDF_FORMAT',
        choices=['xml', 'trix', 'n3', 'nt', 'rdfa'],
        help=
        "The format of the RDF document(s) which serve as the initial facts " +
        " for the RETE network. One of 'xml', 'n3', 'trix', 'nt', " +
        "or 'rdfa'.  The default is %default")

    op.add_option(
        '--safety',
        default='none',
        metavar='RULE_SAFETY',
        choices=['loose', 'strict', 'none'],
        help="Determines how to handle RIF Core safety.  A value of 'loose' " +
        " means that unsafe rules will be ignored.  A value of 'strict' " +
        " will cause a syntax exception upon any unsafe rule.  A value of " +
        "'none' (the default) does nothing")

    op.add_option(
        '--pDSemantics',
        action='store_true',
        default=False,
        help=
        'Used with --dlp to add pD semantics ruleset for semantics not covered '
        + 'by DLP but can be expressed in definite Datalog Logic Programming' +
        ' The default is %default')

    op.add_option(
        '--stdin',
        action='store_true',
        default=False,
        help=
        'Parse STDIN as an RDF graph to contribute to the initial facts. The default is %default '
    )

    op.add_option(
        '--ns',
        action='append',
        default=[],
        metavar="PREFIX=URI",
        help='Register a namespace binding (QName prefix to a base URI).  This '
        + 'can be used more than once')

    op.add_option(
        '--rules',
        default=[],
        action='append',
        metavar='PATH_OR_URI',
        help='The Notation 3 documents to use as rulesets for the RETE network'
        + '.  Can be specified more than once')

    op.add_option('-d',
                  '--debug',
                  action='store_true',
                  default=True,
                  help='Include debugging output')

    op.add_option(
        '--strictness',
        default='defaultBase',
        metavar='DDL_STRICTNESS',
        choices=['loose', 'defaultBase', 'defaultDerived', 'harsh'],
        help=
        'Used with --why to specify whether to: *not* check if predicates are '
        +
        ' both derived and base (loose), if they are, mark as derived (defaultDerived) '
        +
        'or as base (defaultBase) predicates, else raise an exception (harsh)')

    op.add_option(
        '--method',
        default='naive',
        metavar='reasoning algorithm',
        choices=['gms', 'bfp', 'naive'],
        help='Used with --why to specify how to evaluate answers for query.  '
        + 'One of: gms, sld, bfp, naive')

    op.add_option(
        '--firstAnswer',
        default=False,
        action='store_true',
        help=
        'Used with --why to determine whether to fetch all answers or just ' +
        'the first')

    op.add_option(
        '--edb',
        default=[],
        action='append',
        metavar='EXTENSIONAL_DB_PREDICATE_QNAME',
        help=
        'Used with --why/--strictness=defaultDerived to specify which clashing '
        + 'predicate will be designated as a base predicate')

    op.add_option(
        '--idb',
        default=[],
        action='append',
        metavar='INTENSIONAL_DB_PREDICATE_QNAME',
        help=
        'Used with --why/--strictness=defaultBase to specify which clashing ' +
        'predicate will be designated as a derived predicate')

    op.add_option(
        '--hybridPredicate',
        default=[],
        action='append',
        metavar='PREDICATE_QNAME',
        help=
        'Used with --why to explicitely specify a hybrid predicate (in both ' +
        ' IDB and EDB) ')

    op.add_option(
        '--noMagic',
        default=[],
        action='append',
        metavar='DB_PREDICATE_QNAME',
        help='Used with --why to specify that the predicate shouldnt have its '
        + 'magic sets calculated')

    op.add_option(
        '--filter',
        action='append',
        default=[],
        metavar='PATH_OR_URI',
        help=
        'The Notation 3 documents to use as a filter (entailments do not particpate in network)'
    )

    op.add_option(
        '--ruleFacts',
        action='store_true',
        default=False,
        help="Determines whether or not to attempt to parse initial facts from "
        + "the rule graph.  The default is %default")

    op.add_option(
        '--builtins',
        default=False,
        metavar='PATH_TO_PYTHON_MODULE',
        help="The path to a python module with function definitions (and a " +
        "dicitonary called ADDITIONAL_FILTERS) to use for builtins implementations"
    )

    op.add_option(
        '--dlp',
        action='store_true',
        default=False,
        help=
        'Use Description Logic Programming (DLP) to extract rules from OWL/RDF.  The default is %default'
    )

    op.add_option(
        '--sparqlEndpoint',
        action='store_true',
        default=False,
        help=
        'Indicates that the sole argument is the URI of a SPARQL endpoint to query'
    )

    op.add_option(
        '--ontology',
        action='append',
        default=[],
        metavar='PATH_OR_URI',
        help=
        'The path to an OWL RDF/XML graph to use DLP to extract rules from ' +
        '(other wise, fact graph(s) are used)  ')

    op.add_option(
        '--ontologyFormat',
        default='xml',
        dest='ontologyFormat',
        metavar='RDF_FORMAT',
        choices=['xml', 'trix', 'n3', 'nt', 'rdfa'],
        help=
        "The format of the OWL RDF/XML graph specified via --ontology.  The default is %default"
    )

    op.add_option(
        '--builtinTemplates',
        default=None,
        metavar='N3_DOC_PATH_OR_URI',
        help=
        'The path to an N3 document associating SPARQL FILTER templates to ' +
        'rule builtins')

    op.add_option('--negation',
                  action='store_true',
                  default=False,
                  help='Extract negative rules?')

    op.add_option(
        '--normalForm',
        action='store_true',
        default=False,
        help='Whether or not to reduce DL axioms & LP rules to a normal form')
    (options, facts) = op.parse_args()

    nsBinds = {'iw': 'http://inferenceweb.stanford.edu/2004/07/iw.owl#'}
    for nsBind in options.ns:
        pref, nsUri = nsBind.split('=')
        nsBinds[pref] = nsUri

    namespace_manager = NamespaceManager(Graph())
    if options.sparqlEndpoint:
        factGraph = Graph(plugin.get('SPARQLStore', Store)(facts[0]))
        options.hybrid = False
    else:
        factGraph = Graph()
    ruleSet = Ruleset()

    for fileN in options.rules:
        if options.ruleFacts and not options.sparqlEndpoint:
            factGraph.parse(fileN, format='n3')
            print("Parsing RDF facts from ", fileN)
        if options.builtins:
            import imp
            userFuncs = imp.load_source('builtins', options.builtins)
            rs = HornFromN3(fileN,
                            additionalBuiltins=userFuncs.ADDITIONAL_FILTERS)
        else:
            rs = HornFromN3(fileN)
        nsBinds.update(rs.nsMapping)
        ruleSet.formulae.extend(rs)
        #ruleGraph.parse(fileN, format='n3')

    ruleSet.nsMapping = nsBinds

    for prefix, uri in list(nsBinds.items()):
        namespace_manager.bind(prefix, uri, override=False)
    closureDeltaGraph = Graph()
    closureDeltaGraph.namespace_manager = namespace_manager
    factGraph.namespace_manager = namespace_manager

    if not options.sparqlEndpoint:
        for fileN in facts:
            factGraph.parse(fileN, format=options.inputFormat)
            if options.imports:
                for owlImport in factGraph.objects(predicate=OWL_NS.imports):
                    factGraph.parse(owlImport)
                    print("Parsed Semantic Web Graph.. ", owlImport)

    if not options.sparqlEndpoint and facts:
        for pref, uri in factGraph.namespaces():
            nsBinds[pref] = uri

    if options.stdin:
        assert not options.sparqlEndpoint, "Cannot use --stdin with --sparqlEndpoint"
        factGraph.parse(sys.stdin, format=options.inputFormat)

    #Normalize namespace mappings
    #prune redundant, rdflib-allocated namespace prefix mappings
    newNsMgr = NamespaceManager(factGraph)
    from FuXi.Rete.Util import CollapseDictionary
    for k, v in list(
            CollapseDictionary(
                dict([(k, v) for k, v in factGraph.namespaces()])).items()):
        newNsMgr.bind(k, v)
    factGraph.namespace_manager = newNsMgr

    if options.normalForm:
        NormalFormReduction(factGraph)

    if not options.sparqlEndpoint:
        workingMemory = generateTokenSet(factGraph)
    if options.builtins:
        import imp
        userFuncs = imp.load_source('builtins', options.builtins)
        rule_store, rule_graph, network = SetupRuleStore(
            makeNetwork=True, additionalBuiltins=userFuncs.ADDITIONAL_FILTERS)
    else:
        rule_store, rule_graph, network = SetupRuleStore(makeNetwork=True)
    network.inferredFacts = closureDeltaGraph
    network.nsMap = nsBinds

    if options.dlp:
        from FuXi.DLP.DLNormalization import NormalFormReduction
        if options.ontology:
            ontGraph = Graph()
            for fileN in options.ontology:
                ontGraph.parse(fileN, format=options.ontologyFormat)
                for prefix, uri in ontGraph.namespaces():
                    nsBinds[prefix] = uri
                    namespace_manager.bind(prefix, uri, override=False)
                    if options.sparqlEndpoint:
                        factGraph.store.bind(prefix, uri)
        else:
            ontGraph = factGraph
        NormalFormReduction(ontGraph)
        dlp = network.setupDescriptionLogicProgramming(
            ontGraph,
            addPDSemantics=options.pDSemantics,
            constructNetwork=False,
            ignoreNegativeStratus=options.negation,
            safety=safetyNameMap[options.safety])
        ruleSet.formulae.extend(dlp)
    if options.output == 'rif' and not options.why:
        for rule in ruleSet:
            print(rule)
        if options.negation:
            for nRule in network.negRules:
                print(nRule)

    elif options.output == 'man-owl':
        cGraph = network.closureGraph(factGraph, readOnly=False)
        cGraph.namespace_manager = namespace_manager
        Individual.factoryGraph = cGraph
        if options.classes:
            mapping = dict(namespace_manager.namespaces())
            for c in options.classes:
                pref, uri = c.split(':')
                print(Class(URIRef(mapping[pref] + uri)).__repr__(True))
        elif options.properties:
            mapping = dict(namespace_manager.namespaces())
            for p in options.properties:
                pref, uri = p.split(':')
                print(Property(URIRef(mapping[pref] + uri)))
        else:
            for p in AllProperties(cGraph):
                print(p.identifier, first(p.label))
                print(repr(p))
            for c in AllClasses(cGraph):
                if options.normalize:
                    if c.isPrimitive():
                        primAnc = [
                            sc for sc in c.subClassOf if sc.isPrimitive()
                        ]
                        if len(primAnc) > 1:
                            warnings.warn(
                                "Branches of primitive skeleton taxonomy" +
                                " should form trees: %s has %s primitive parents: %s"
                                % (c.qname, len(primAnc), primAnc),
                                UserWarning, 1)
                        children = [desc for desc in c.subSumpteeIds()]
                        for child in children:
                            for otherChild in [
                                    o for o in children if o is not child
                            ]:
                                if not otherChild in [
                                        c.identifier
                                        for c in Class(child).disjointWith
                                ]:  # and \
                                    warnings.warn(
                                        "Primitive children (of %s) " % (c.qname) + \
                                        "must be mutually disjoint: %s and %s" % (
                                    Class(child).qname, Class(otherChild).qname), UserWarning, 1)
                # if not isinstance(c.identifier, BNode):
                print(c.__repr__(True))

    if not options.why:
        # Naive construction of graph
        for rule in ruleSet:
            network.buildNetworkFromClause(rule)

    magicSeeds = []
    if options.why:
        builtinTemplateGraph = Graph()
        if options.builtinTemplates:
            builtinTemplateGraph = Graph().parse(options.builtinTemplates,
                                                 format='n3')
        factGraph.templateMap = \
            dict([(pred, template)
                      for pred, _ignore, template in
                            builtinTemplateGraph.triples(
                                (None,
                                 TEMPLATES.filterTemplate,
                                 None))])
        goals = []
        query = ParseSPARQL(options.why)
        network.nsMap['pml'] = PML
        network.nsMap['gmp'] = GMP_NS
        network.nsMap['owl'] = OWL_NS
        nsBinds.update(network.nsMap)
        network.nsMap = nsBinds
        if not query.prologue:
            query.prologue = Prologue(None, [])
            query.prologue.prefixBindings.update(nsBinds)
        else:
            for prefix, nsInst in list(nsBinds.items()):
                if prefix not in query.prologue.prefixBindings:
                    query.prologue.prefixBindings[prefix] = nsInst
        print("query.prologue", query.prologue)
        print("query.query", query.query)
        print("query.query.whereClause", query.query.whereClause)
        print("query.query.whereClause.parsedGraphPattern",
              query.query.whereClause.parsedGraphPattern)
        goals.extend([(s, p, o) for s, p, o, c in ReduceGraphPattern(
            query.query.whereClause.parsedGraphPattern,
            query.prologue).patterns])
        # dPreds=[]# p for s, p, o in goals ]
        # print("goals", goals)
        magicRuleNo = 0
        bottomUpDerivedPreds = []
        # topDownDerivedPreds  = []
        defaultBasePreds = []
        defaultDerivedPreds = set()
        hybridPredicates = []
        mapping = dict(newNsMgr.namespaces())
        for edb in options.edb:
            pref, uri = edb.split(':')
            defaultBasePreds.append(URIRef(mapping[pref] + uri))
        noMagic = []
        for pred in options.noMagic:
            pref, uri = pred.split(':')
            noMagic.append(URIRef(mapping[pref] + uri))
        if options.ddlGraph:
            ddlGraph = Graph().parse(options.ddlGraph, format='n3')
            # @TODO: should also get hybrid predicates from DDL graph
            defaultDerivedPreds = IdentifyDerivedPredicates(
                ddlGraph, Graph(), ruleSet)
        else:
            for idb in options.idb:
                pref, uri = idb.split(':')
                defaultDerivedPreds.add(URIRef(mapping[pref] + uri))
            defaultDerivedPreds.update(
                set([p == RDF.type and o or p for s, p, o in goals]))
            for hybrid in options.hybridPredicate:
                pref, uri = hybrid.split(':')
                hybridPredicates.append(URIRef(mapping[pref] + uri))

        if options.method == 'gms':
            for goal in goals:
                goalSeed = AdornLiteral(goal).makeMagicPred()
                print("Magic seed fact (used in bottom-up evaluation)",
                      goalSeed)
                magicSeeds.append(goalSeed.toRDFTuple())
            if noMagic:
                print("Predicates whose magic sets will not be calculated")
                for p in noMagic:
                    print("\t", factGraph.qname(p))
            for rule in MagicSetTransformation(
                    factGraph,
                    ruleSet,
                    goals,
                    derivedPreds=bottomUpDerivedPreds,
                    strictCheck=nameMap[options.strictness],
                    defaultPredicates=(defaultBasePreds, defaultDerivedPreds),
                    noMagic=noMagic):
                magicRuleNo += 1
                network.buildNetworkFromClause(rule)
            if len(list(ruleSet)):
                print("reduction in size of program: %s (%s -> %s clauses)" %
                      (100 -
                       (float(magicRuleNo) / float(len(list(ruleSet)))) * 100,
                       len(list(ruleSet)), magicRuleNo))
            start = time.time()
            network.feedFactsToAdd(generateTokenSet(magicSeeds))
            if not [
                    rule for rule in factGraph.adornedProgram if len(rule.sip)
            ]:
                warnings.warn(
                    "Using GMS sideways information strategy with no " +
                    "information to pass from query.  Falling back to " +
                    "naive method over given facts and rules")
                network.feedFactsToAdd(workingMemory)
            sTime = time.time() - start
            if sTime > 1:
                sTimeStr = "%s seconds" % sTime
            else:
                sTime = sTime * 1000
                sTimeStr = "%s milli seconds" % sTime
            print("Time to calculate closure on working memory: ", sTimeStr)

            if options.output == 'rif':
                print("Rules used for bottom-up evaluation")
                if network.rules:
                    for clause in network.rules:
                        print(clause)
                else:
                    for clause in factGraph.adornedProgram:
                        print(clause)
            if options.output == 'conflict':
                network.reportConflictSet()

        elif options.method == 'bfp':
            topDownDPreds = defaultDerivedPreds
            if options.builtinTemplates:
                builtinTemplateGraph = Graph().parse(options.builtinTemplates,
                                                     format='n3')
                builtinDict = dict([
                    (pred, template) for pred, _ignore, template in
                    builtinTemplateGraph.triples((None,
                                                  TEMPLATES.filterTemplate,
                                                  None))
                ])
            else:
                builtinDict = None
            topDownStore = TopDownSPARQLEntailingStore(
                factGraph.store,
                factGraph,
                idb=ruleSet,
                DEBUG=options.debug,
                derivedPredicates=topDownDPreds,
                templateMap=builtinDict,
                nsBindings=network.nsMap,
                identifyHybridPredicates=options.hybrid
                if options.method == 'bfp' else False,
                hybridPredicates=hybridPredicates)
            targetGraph = Graph(topDownStore)
            for pref, nsUri in list(network.nsMap.items()):
                targetGraph.bind(pref, nsUri)
            start = time.time()
            # queryLiteral = EDBQuery([BuildUnitermFromTuple(goal) for goal in goals],
            #                         targetGraph)
            # query = queryLiteral.asSPARQL()
            # print("Goal to solve ", query)
            sTime = time.time() - start
            result = targetGraph.query(options.why, initNs=network.nsMap)
            if result.askAnswer:
                sTime = time.time() - start
                if sTime > 1:
                    sTimeStr = "%s seconds" % sTime
                else:
                    sTime = sTime * 1000
                    sTimeStr = "%s milli seconds" % sTime
                print("Time to reach answer ground goal answer of %s: %s" %
                      (result.askAnswer[0], sTimeStr))
            else:
                for rt in result:
                    sTime = time.time() - start
                    if sTime > 1:
                        sTimeStr = "%s seconds" % sTime
                    else:
                        sTime = sTime * 1000
                        sTimeStr = "%s milli seconds" % sTime
                    if options.firstAnswer:
                        break
                    print(
                        "Time to reach answer %s via top-down SPARQL sip strategy: %s"
                        % (rt, sTimeStr))
            if options.output == 'conflict' and options.method == 'bfp':
                for _network, _goal in topDownStore.queryNetworks:
                    print(network, _goal)
                    _network.reportConflictSet(options.debug)
                for query in topDownStore.edbQueries:
                    print(query.asSPARQL())

    elif options.method == 'naive':
        start = time.time()
        network.feedFactsToAdd(workingMemory)
        sTime = time.time() - start
        if sTime > 1:
            sTimeStr = "%s seconds" % sTime
        else:
            sTime = sTime * 1000
            sTimeStr = "%s milli seconds" % sTime
        print("Time to calculate closure on working memory: ", sTimeStr)
        print(network)
        if options.output == 'conflict':
            network.reportConflictSet()

    for fileN in options.filter:
        for rule in HornFromN3(fileN):
            network.buildFilterNetworkFromClause(rule)

    if options.negation and network.negRules and options.method in [
            'both', 'bottomUp'
    ]:
        now = time.time()
        rt = network.calculateStratifiedModel(factGraph)
        print(
            "Time to calculate stratified, stable model (inferred %s facts): %s"
            % (rt, time.time() - now))
    if options.filter:
        print("Applying filter to entailed facts")
        network.inferredFacts = network.filteredFacts

    if options.closure and options.output in RDF_SERIALIZATION_FORMATS:
        cGraph = network.closureGraph(factGraph)
        cGraph.namespace_manager = namespace_manager
        print(
            cGraph.serialize(destination=None,
                             format=options.output,
                             base=None))
    elif options.output and options.output in RDF_SERIALIZATION_FORMATS:
        print(
            network.inferredFacts.serialize(destination=None,
                                            format=options.output,
                                            base=None))
示例#44
0
def addattributes():

    carConfig = getCarConfig()
    embeddedSignals = getConfigNames(carConfig)

    url = SERVER_URL

    #Binding of namespaces
    namespace_manager = NamespaceManager(Graph())
    namespace_manager.bind("rdfs", rdfs, override=False)
    #namespace_manager.bind("vss",vss,override=False)
    namespace_manager.bind("ssn", ssn, override=False)
    namespace_manager.bind("sosa", sosa, override=False)
    namespace_manager.bind("geo", geo, override=False)
    namespace_manager.bind("sf", sf, override=False)
    namespace_manager.bind("qudt-1-1", qudt11, override=False)
    namespace_manager.bind("qudt-unit-1-1", qudtunit11, override=False)
    namespace_manager.bind("dbr", dbr, override=False)
    namespace_manager.bind("vso", vso, override=False)
    namespace_manager.bind("step", step, override=False)
    namespace_manager.bind("time", otime, override=False)
    g.namespace_manager = namespace_manager

    #Creation of triples about the car
    g.add((MyCar, RDF.type, sosa.FeatureOfInterest))
    g.add((MyCar, RDF.type, vso.Automobile))
    g.add((MyCar, RDF.type, geo.Feature))

    #For every sensor provided as an input, if it is known, its is attached to a sensorType (from DBpedia), a unit and an observable property.
    for signal in getVSS(embeddedSignals):
        sensorType = signal.sensor
        unit = signal.unit
        observableProperty = signal.uri.split('#')[-1]

        Sensor = BNode()
        ObservableProperty = signal.uri
        g.add((Sensor, RDF.type, vso.FeatureValue))
        g.add((Sensor, RDF.type, sensorType))
        g.add((MyCar, vso.feature, Sensor))
        g.add((Sensor, sosa.observes, ObservableProperty))
        g.add((ObservableProperty, rdfs.label, Literal(observableProperty)))
        g.add((ObservableProperty, RDF.type, sosa.ObservableProperty))
        g.add((ObservableProperty, qudt11.Unit, unit))

    #The graph is stored
    file = open("outputFile.ttl", "w")
    file.write(g.serialize(format='turtle'))
    file.close()
    #Return the graph
    #TO DO: check the missing prefixes
    return g.serialize(format='turtle')
示例#45
0
def sparql_to_rgraph(query):
    """
    Transform SPARQL query into its R-graph representation.
    All BNodes are scoped within their BGPs and assigned a fresh and unique BNode label.
    All non-projection variables are assigned a fresh and unique BNode label on global scope.
    :param query: SPARQL algebra expression
    :return: rdflib.Graph
    """
    import networkx as nx
    import rdflib
    from rdflib import URIRef, BNode, Literal
    from rdflib.namespace import RDF

    def ground_projected_variables(r_graph):
        """
        Ground projected variables to avoid their removal during minimisation.
        :param r_graph:
        :return:
        """
        proj_vars = set(
            r_graph.objects(None,
                            rdflib.term.URIRef("http://www.dfki.de/voc#var")))
        for proj_var in proj_vars:
            r_graph.skolemize(new_graph=r_graph,
                              bnode=rdflib.term.BNode(proj_var))
            r_graph.remove((None, None, rdflib.term.BNode(proj_var)))
        return r_graph

    def sparql_iter(expr, ctx, ctx_graph):
        """
        Traverse SPARQL algebra expression and build rdflib.Graph
        :param expr: SPARQL algebra expression
        :param ctx: rdflib.Identifier as ctx node
        :return: rdflib.Graph for SPARQL query expression expr
        """

        # handles a triple pattern expression
        if isinstance(expr, tuple):
            # each triple pattern is a subgraph of its ctx node
            triple = BNode()
            ctx_graph.add((triple, RDF.type,
                           URIRef("http://www.dfki.de/voc#TriplePattern")))
            ctx_graph.add((ctx, URIRef("http://www.dfki.de/voc#arg"), triple))

            if isinstance(expr[0], rdflib.term.URIRef):
                ctx_graph.add((triple, URIRef("http://www.dfki.de/voc#s"),
                               URIRef(expr[0])))
            elif isinstance(expr[0], rdflib.term.Literal):
                ctx_graph.add((triple, URIRef("http://www.dfki.de/voc#s"),
                               Literal(expr[0])))
            elif isinstance(expr[0], rdflib.term.BNode):
                # blank nodes are scoped to the basic graph pattern.
                ctx_graph.add((triple, URIRef("http://www.dfki.de/voc#s"),
                               BNode(ctx.__str__() + "__" + expr[0])))
            elif isinstance(expr[0], rdflib.term.Variable):
                # Note that only variables projected out of the subquery will be visible
                if BNode(expr[0]) in set(
                        ctx_graph.objects(
                            None, URIRef("http://www.dfki.de/voc#var"))):
                    ctx_graph.add((triple, URIRef("http://www.dfki.de/voc#s"),
                                   BNode(expr[0])))
                else:
                    ctx_graph.add((triple, URIRef("http://www.dfki.de/voc#s"),
                                   BNode(ctx.__str__() + "__" + expr[0])))

            if isinstance(expr[1], rdflib.term.URIRef):
                ctx_graph.add((triple, URIRef("http://www.dfki.de/voc#p"),
                               URIRef(expr[1])))
            elif isinstance(expr[1], rdflib.term.Literal):
                ctx_graph.add((triple, URIRef("http://www.dfki.de/voc#p"),
                               Literal(expr[1])))
            elif isinstance(expr[1], rdflib.term.BNode):
                # blank nodes are scoped to the basic graph pattern.
                ctx_graph.add((triple, URIRef("http://www.dfki.de/voc#p"),
                               BNode(ctx.__str__() + "__" + expr[1])))
            elif isinstance(expr[1], rdflib.term.Variable):
                # Note that only variables projected out of the subquery will be visible
                if BNode(expr[1]) in set(
                        ctx_graph.objects(
                            None, URIRef("http://www.dfki.de/voc#var"))):
                    ctx_graph.add((triple, URIRef("http://www.dfki.de/voc#p"),
                                   BNode(expr[1])))
                else:
                    ctx_graph.add((triple, URIRef("http://www.dfki.de/voc#p"),
                                   BNode(ctx.__str__() + "__" + expr[1])))

            if isinstance(expr[2], rdflib.term.URIRef):
                ctx_graph.add((triple, URIRef("http://www.dfki.de/voc#o"),
                               URIRef(expr[2])))
            elif isinstance(expr[2], rdflib.term.Literal):
                ctx_graph.add((triple, URIRef("http://www.dfki.de/voc#o"),
                               Literal(expr[2])))
            elif isinstance(expr[2], rdflib.term.BNode):
                # blank nodes are scoped to the basic graph pattern.
                ctx_graph.add((triple, URIRef("http://www.dfki.de/voc#o"),
                               BNode(ctx.__str__() + "__" + expr[2])))
            elif isinstance(expr[2], rdflib.term.Variable):
                # Note that only variables projected out of the subquery will be visible
                if BNode(expr[2]) in set(
                        ctx_graph.objects(
                            None, URIRef("http://www.dfki.de/voc#var"))):
                    ctx_graph.add((triple, URIRef("http://www.dfki.de/voc#o"),
                                   BNode(expr[2])))
                else:
                    ctx_graph.add((triple, URIRef("http://www.dfki.de/voc#o"),
                                   BNode(ctx.__str__() + "__" + expr[2])))

            return ctx_graph

        # handles a Basic Graph Pattern as n-ary JOINs
        if expr.name == "BGP":
            # each BGP is a subgraph of its ctx node
            if (ctx, RDF.type,
                    URIRef("http://www.dfki.de/voc#Join")) in ctx_graph:
                # if ctx node is a JOIN, we add all triple patterns as arguments
                for arg in set(expr['triples']):
                    ctx_graph = ctx_graph + sparql_iter(arg, ctx, ctx_graph)
            else:
                # if ctx node is NOT a JOIN, we turn the BGP into a JOIN node
                join = BNode()
                ctx_graph.add(
                    (join, RDF.type, URIRef("http://www.dfki.de/voc#Join")))
                ctx_graph.add(
                    (ctx, URIRef("http://www.dfki.de/voc#arg"), join))
                for arg in set(expr['triples']):
                    ctx_graph = ctx_graph + sparql_iter(arg, join, ctx_graph)

            return ctx_graph

        # handles a JOIN node
        if expr.name == "Join":
            # make nested JOINs n-ary operators
            if (ctx, RDF.type,
                    URIRef("http://www.dfki.de/voc#Join")) in ctx_graph:
                ctx_graph = ctx_graph + sparql_iter(expr['p1'], ctx, ctx_graph)
                ctx_graph = ctx_graph + sparql_iter(expr['p2'], ctx, ctx_graph)
            else:
                join = BNode()
                ctx_graph.add(
                    (join, RDF.type, URIRef("http://www.dfki.de/voc#Join")))
                ctx_graph.add(
                    (ctx, URIRef("http://www.dfki.de/voc#arg"), join))
                ctx_graph = ctx_graph + sparql_iter(expr['p1'], join,
                                                    ctx_graph)
                ctx_graph = ctx_graph + sparql_iter(expr['p2'], join,
                                                    ctx_graph)

            return ctx_graph

        # handles a UNION node
        if expr.name == "Union":
            # make nested UNIONs n-ary operators
            if (ctx, RDF.type,
                    URIRef("http://www.dfki.de/voc#Union")) in ctx_graph:
                ctx_graph = ctx_graph + sparql_iter(expr['p1'], ctx, ctx_graph)
                ctx_graph = ctx_graph + sparql_iter(expr['p2'], ctx, ctx_graph)
            else:
                union = BNode()
                ctx_graph.add(
                    (union, RDF.type, URIRef("http://www.dfki.de/voc#Union")))
                ctx_graph.add(
                    (ctx, URIRef("http://www.dfki.de/voc#arg"), union))
                ctx_graph = ctx_graph + sparql_iter(expr['p1'], union,
                                                    ctx_graph)
                ctx_graph = ctx_graph + sparql_iter(expr['p2'], union,
                                                    ctx_graph)

            return ctx_graph

        # handles a PROJECT node
        if expr.name == "Project":
            # since we assume UNION normal form, we always add a UNION below the SELECT node
            union = BNode()
            ctx_graph.add(
                (union, RDF.type, URIRef("http://www.dfki.de/voc#Union")))
            ctx_graph.add((ctx, URIRef("http://www.dfki.de/voc#arg"), union))
            ctx_graph = ctx_graph + sparql_iter(expr['p'], union, ctx_graph)

            return ctx_graph

    # handles a SELECT node and transform the query into a R-graph
    if query.algebra.name == "SelectQuery":
        # every SELECT node is a query graph
        from rdflib.namespace import Namespace, NamespaceManager
        dfkiNs = Namespace("http://www.dfki.de/voc#")
        namespace_manager = NamespaceManager(rdflib.Graph())
        namespace_manager.bind("dfki", dfkiNs, override=False)

        r_graph = rdflib.Graph()
        r_graph.namespace_manager = namespace_manager
        select = BNode()  # a GUID is generated
        r_graph.add(
            (select, RDF.type, URIRef("http://www.dfki.de/voc#Select")))
        for proj_var in set(query.algebra.PV):
            r_graph.add((select, URIRef("http://www.dfki.de/voc#var"),
                         BNode(proj_var)))
        r_graph = r_graph + sparql_iter(query.algebra['p'], select, r_graph)

        return ground_projected_variables(r_graph)
    else:
        pass
示例#46
0
def main():
    from optparse import OptionParser
    op = OptionParser(
      'usage: %prog [options] factFile1 factFile2 ... factFileN')
    op.add_option('--why',
                  default=None,
      help='Specifies the goals to solve for using the non-niave methods' +
              'see --method')
    op.add_option('--closure',
                  action='store_true',
                  default=False,
      help='Whether or not to serialize the inferred triples' +
             ' along with the original triples.  Otherwise ' +
              '(the default behavior), serialize only the inferred triples')
    op.add_option('--imports',
                action='store_true',
                default=False,
    help='Whether or not to follow owl:imports in the fact graph')
    op.add_option('--output',
                  default='n3',
                  metavar='RDF_FORMAT',
                  choices=['xml',
                             'TriX',
                             'n3',
                             'pml',
                             'proof-graph',
                             'nt',
                             'rif',
                             'rif-xml',
                             'conflict',
                             'man-owl'],
      help="Serialize the inferred triples and/or original RDF triples to STDOUT "+
             "using the specified RDF syntax ('xml','pretty-xml','nt','turtle', "+
             "or 'n3') or to print a summary of the conflict set (from the RETE "+
             "network) if the value of this option is 'conflict'.  If the the "+
             " value is 'rif' or 'rif-xml', Then the rules used for inference "+
             "will be serialized as RIF.  If the value is 'pml' and --why is used, "+
             " then the PML RDF statements are serialized.  If output is "+
             "'proof-graph then a graphviz .dot file of the proof graph is printed. "+
             "Finally if the value is 'man-owl', then the RDF facts are assumed "+
             "to be OWL/RDF and serialized via Manchester OWL syntax. The default is %default")
    op.add_option('--class',
                  dest='classes',
                  action='append',
                  default=[],
                  metavar='QNAME',
      help='Used with --output=man-owl to determine which '+
             'classes within the entire OWL/RDF are targetted for serialization'+
             '.  Can be used more than once')
    op.add_option('--hybrid',
                  action='store_true',
                  default=False,
      help='Used with with --method=bfp to determine whether or not to '+
             'peek into the fact graph to identify predicates that are both '+
             'derived and base.  This is expensive for large fact graphs'+
             'and is explicitely not used against SPARQL endpoints')
    op.add_option('--property',
                  action='append',
                  dest='properties',
                  default=[],
                  metavar='QNAME',
      help='Used with --output=man-owl or --extract to determine which '+
             'properties are serialized / extracted.  Can be used more than once')
    op.add_option('--normalize',
                  action='store_true',
                  default=False,
      help="Used with --output=man-owl to attempt to determine if the ontology is 'normalized' [Rector, A. 2003]"+
      "The default is %default")
    op.add_option('--ddlGraph',
                default=False,
      help="The location of a N3 Data Description document describing the IDB predicates")
    op.add_option('--input-format',
                  default='xml',
                  dest='inputFormat',
                  metavar='RDF_FORMAT',
                  choices=['xml', 'trix', 'n3', 'nt', 'rdfa'],
      help="The format of the RDF document(s) which serve as the initial facts "+
             " for the RETE network. One of 'xml','n3','trix', 'nt', "+
             "or 'rdfa'.  The default is %default")
    op.add_option('--safety',
                  default='none',
                  metavar='RULE_SAFETY',
                  choices=['loose', 'strict', 'none'],
      help="Determines how to handle RIF Core safety.  A value of 'loose' "+
             " means that unsafe rules will be ignored.  A value of 'strict' "+
             " will cause a syntax exception upon any unsafe rule.  A value of "+
             "'none' (the default) does nothing")
    op.add_option('--pDSemantics',
                  action='store_true',
                  default=False,
      help='Used with --dlp to add pD semantics ruleset for semantics not covered '+
      'by DLP but can be expressed in definite Datalog Logic Programming'+
      ' The default is %default')
    op.add_option('--stdin',
                  action='store_true',
                  default=False,
      help='Parse STDIN as an RDF graph to contribute to the initial facts. The default is %default ')
    op.add_option('--ns',
                  action='append',
                  default=[],
                  metavar="PREFIX=URI",
      help='Register a namespace binding (QName prefix to a base URI).  This '+
             'can be used more than once')
    op.add_option('--rules',
                  default=[],
                  action='append',
                  metavar='PATH_OR_URI',
      help='The Notation 3 documents to use as rulesets for the RETE network'+
      '.  Can be specified more than once')
    op.add_option('-d', '--debug', action='store_true', default=False,
      help='Include debugging output')
    op.add_option('--strictness',
                  default='defaultBase',
                  metavar='DDL_STRICTNESS',
                  choices=['loose',
                             'defaultBase',
                             'defaultDerived',
                             'harsh'],
      help='Used with --why to specify whether to: *not* check if predicates are '+
      ' both derived and base (loose), if they are, mark as derived (defaultDerived) '+
      'or as base (defaultBase) predicates, else raise an exception (harsh)')
    op.add_option('--method',
                  default='naive',
                  metavar='reasoning algorithm',
                  choices=['gms', 'bfp', 'naive'],
      help='Used with --why to specify how to evaluate answers for query.  '+
      'One of: gms,bfp,naive')
    op.add_option('--firstAnswer',
                  default=False,
                  action='store_true',
      help='Used with --why to determine whether to fetch all answers or just '+
      'the first')
    op.add_option('--edb',
                  default=[],
                  action='append',
                  metavar='EXTENSIONAL_DB_PREDICATE_QNAME',
      help='Used with --why/--strictness=defaultDerived to specify which clashing '+
      'predicate will be designated as a base predicate')
    op.add_option('--idb',
                  default=[],
                  action='append',
                  metavar='INTENSIONAL_DB_PREDICATE_QNAME',
      help='Used with --why/--strictness=defaultBase to specify which clashing '+
      'predicate will be designated as a derived predicate')
    op.add_option('--hybridPredicate',
                default=[],
                action='append',
                metavar='PREDICATE_QNAME',
    help='Used with --why to explicitely specify a hybrid predicate (in both '+
           ' IDB and EDB) ')

    op.add_option('--noMagic',
                  default=[],
                  action='append',
                  metavar='DB_PREDICATE_QNAME',
      help='Used with --why to specify that the predicate shouldnt have its '+
      'magic sets calculated')
    op.add_option('--filter',
                  action='append',
                  default=[],
                  metavar='PATH_OR_URI',
      help='The Notation 3 documents to use as a filter (entailments do not particpate in network)')
    op.add_option('--ruleFacts',
                  action='store_true',
                  default=False,
      help="Determines whether or not to attempt to parse initial facts from "+
      "the rule graph.  The default is %default")
    op.add_option('--builtins',
                  default=False,
                  metavar='PATH_TO_PYTHON_MODULE',
      help="The path to a python module with function definitions (and a "+
      "dicitonary called ADDITIONAL_FILTERS) to use for builtins implementations")
    op.add_option('--dlp',
                  action='store_true',
                  default=False,
      help='Use Description Logic Programming (DLP) to extract rules from OWL/RDF.  The default is %default')
    op.add_option('--sparqlEndpoint',
                action='store_true',
                default=False,
    help='Indicates that the sole argument is the URI of a SPARQL endpoint to query')

    op.add_option('--ontology',
                  action='append',
                  default=[],
                  metavar='PATH_OR_URI',
      help='The path to an OWL RDF/XML graph to use DLP to extract rules from '+
      '(other wise, fact graph(s) are used)  ')

    op.add_option('--ruleFormat',
        default='n3',
        dest='ruleFormat',
        metavar='RULE_FORMAT',
        choices=['n3', 'rif'],
        help="The format of the rules to parse ('n3', 'rif').  The default is %default")

    op.add_option('--ontologyFormat',
                default='xml',
                dest='ontologyFormat',
                metavar='RDF_FORMAT',
                choices=['xml', 'trix', 'n3', 'nt', 'rdfa'],
    help="The format of the OWL RDF/XML graph specified via --ontology.  The default is %default")

    op.add_option('--builtinTemplates',
                  default=None,
                  metavar='N3_DOC_PATH_OR_URI',
      help='The path to an N3 document associating SPARQL FILTER templates to '+
      'rule builtins')
    op.add_option('--negation',
                  action='store_true',
                  default=False,
      help='Extract negative rules?')
    op.add_option('--normalForm',
                  action='store_true',
                  default=False,
      help='Whether or not to reduce DL axioms & LP rules to a normal form')
    (options, facts) = op.parse_args()

    nsBinds = {'iw': 'http://inferenceweb.stanford.edu/2004/07/iw.owl#'}
    for nsBind in options.ns:
        pref, nsUri = nsBind.split('=')
        nsBinds[pref]=nsUri

    namespace_manager = NamespaceManager(Graph())
    if options.sparqlEndpoint:
        factGraph = Graph(plugin.get('SPARQL', Store)(facts[0]))
        options.hybrid = False
    else:
        factGraph = Graph()
    ruleSet = Ruleset()

    for fileN in options.rules:
        if options.ruleFacts and not options.sparqlEndpoint:
            factGraph.parse(fileN, format='n3')
            print("Parsing RDF facts from %s" % fileN)
        if options.builtins:
            import imp
            userFuncs = imp.load_source('builtins', options.builtins)
            rs = HornFromN3(fileN,
                            additionalBuiltins=userFuncs.ADDITIONAL_FILTERS)
            nsBinds.update(rs.nsMapping)
        elif options.ruleFormat == 'rif':
            try:
                from FuXi.Horn.RIFCore import RIFCoreParser
                rif_parser = RIFCoreParser(location=fileN, debug=options.debug)
                rs = rif_parser.getRuleset()
            except ImportError:
                raise Exception(
                    "Missing 3rd party libraries for RIF processing"
                )
        else:
            rs = HornFromN3(fileN)
        nsBinds.update(rs.nsMapping)
        ruleSet.formulae.extend(rs)
        #ruleGraph.parse(fileN,format='n3')

    ruleSet.nsMapping = nsBinds

    for prefix, uri in list(nsBinds.items()):
        namespace_manager.bind(prefix, uri, override=False)
    closureDeltaGraph = Graph()
    closureDeltaGraph.namespace_manager = namespace_manager
    factGraph.namespace_manager = namespace_manager

    if not options.sparqlEndpoint:
        for fileN in facts:
            factGraph.parse(fileN, format=options.inputFormat)
            if options.imports:
                for owlImport in factGraph.objects(predicate=OWL_NS.imports):
                    factGraph.parse(owlImport)
                    print("Parsed Semantic Web Graph.. %s" % owlImport)

    if not options.sparqlEndpoint and facts:
        for pref, uri in factGraph.namespaces():
            nsBinds[pref]=uri

    if options.stdin:
        assert not options.sparqlEndpoint, "Cannot use --stdin with --sparqlEndpoint"
        factGraph.parse(sys.stdin, format=options.inputFormat)

    #Normalize namespace mappings
    #prune redundant, rdflib-allocated namespace prefix mappings
    newNsMgr = NamespaceManager(factGraph)
    from FuXi.Rete.Util import CollapseDictionary
    for k, v in list(CollapseDictionary(dict([(k, v)
                                    for k, v in factGraph.namespaces()])).items()):
        newNsMgr.bind(k, v)
    factGraph.namespace_manager = newNsMgr

    if options.normalForm:
        NormalFormReduction(factGraph)

    if not options.sparqlEndpoint:
        workingMemory = generateTokenSet(factGraph)
    if options.builtins:
        import imp
        userFuncs = imp.load_source('builtins', options.builtins)
        rule_store, rule_graph, network = SetupRuleStore(
                             makeNetwork=True,
                             additionalBuiltins=userFuncs.ADDITIONAL_FILTERS)
    else:
        rule_store, rule_graph, network = SetupRuleStore(makeNetwork=True)
    network.inferredFacts = closureDeltaGraph
    network.nsMap = nsBinds

    if options.dlp:
        from FuXi.DLP.DLNormalization import NormalFormReduction
        if options.ontology:
            ontGraph = Graph()
            for fileN in options.ontology:
                ontGraph.parse(fileN, format=options.ontologyFormat)
                for prefix, uri in ontGraph.namespaces():
                    nsBinds[prefix] = uri
                    namespace_manager.bind(prefix, uri, override=False)
                    if options.sparqlEndpoint:
                        factGraph.store.bind(prefix, uri)
        else:
            ontGraph=factGraph
        NormalFormReduction(ontGraph)
        dlp=network.setupDescriptionLogicProgramming(
                                 ontGraph,
                                 addPDSemantics=options.pDSemantics,
                                 constructNetwork=False,
                                 ignoreNegativeStratus=options.negation,
                                 safety=safetyNameMap[options.safety])
        ruleSet.formulae.extend(dlp)
    if options.output == 'rif' and not options.why:
        for rule in ruleSet:
            print(rule)
        if options.negation:
            for nRule in network.negRules:
                print(nRule)

    elif options.output == 'man-owl':
        cGraph = network.closureGraph(factGraph, readOnly=False)
        cGraph.namespace_manager = namespace_manager
        Individual.factoryGraph = cGraph
        if options.classes:
            mapping = dict(namespace_manager.namespaces())
            for c in options.classes:
                pref, uri = c.split(':')
                print(Class(URIRef(mapping[pref] + uri)).__repr__(True))
        elif options.properties:
            mapping = dict(namespace_manager.namespaces())
            for p in options.properties:
                pref, uri = p.split(':')
                print(Property(URIRef(mapping[pref] + uri)))
        else:
            for p in AllProperties(cGraph):
                print(p.identifier, first(p.label))
                print(repr(p))
            for c in AllClasses(cGraph):
                if options.normalize:
                    if c.isPrimitive():
                        primAnc = [sc for sc in c.subClassOf if sc.isPrimitive()]
                        if len(primAnc) > 1:
                            warnings.warn("Branches of primitive skeleton taxonomy" +
                              " should form trees: %s has %s primitive parents: %s" % (
                             c.qname, len(primAnc), primAnc), UserWarning, 1)
                        children = [desc for desc in c.subSumpteeIds()]
                        for child in children:
                            for otherChild in [o for o in children if o is not child]:
                                if not otherChild in [c.identifier
                                          for c in Class(child).disjointWith]:  # and\
                                    warnings.warn("Primitive children (of %s) " % (c.qname) +
                                          "must be mutually disjoint: %s and %s" % (
                                      Class(child).qname,
                                      Class(otherChild).qname), UserWarning, 1)
                # if not isinstance(c.identifier,BNode):
                print(c.__repr__(True))

    if not options.why:
        #Naive construction of graph
        for rule in ruleSet:
            network.buildNetworkFromClause(rule)

    magicSeeds=[]
    if options.why:
        builtinTemplateGraph = Graph()
        if options.builtinTemplates:
            builtinTemplateGraph = Graph().parse(options.builtinTemplates,
                                                format='n3')
        factGraph.templateMap = \
            dict([(pred, template)
                      for pred, _ignore, template in
                            builtinTemplateGraph.triples(
                                (None,
                                 TEMPLATES.filterTemplate,
                                 None))])
        goals = []
        query = ParseSPARQL(options.why)
        network.nsMap['pml'] = PML
        network.nsMap['gmp'] = GMP_NS
        network.nsMap['owl'] = OWL_NS
        nsBinds.update(network.nsMap)
        network.nsMap = nsBinds
        if not query.prolog:
                query.prolog = Prolog(None, [])
                query.prolog.prefixBindings.update(nsBinds)
        else:
            for prefix, nsInst in list(nsBinds.items()):
                if prefix not in query.prolog.prefixBindings:
                    query.prolog.prefixBindings[prefix] = nsInst
        goals.extend([(s, p, o) for s, p, o, c in ReduceGraphPattern(
                                    query.query.whereClause.parsedGraphPattern,
                                    query.prolog).patterns])
        # dPreds=[]# p for s,p,o in goals ]
        magicRuleNo = 0
        bottomUpDerivedPreds = []
        # topDownDerivedPreds  = []
        defaultBasePreds = []
        defaultDerivedPreds = set()
        hybridPredicates = []
        mapping = dict(newNsMgr.namespaces())
        for edb in options.edb:
            pref, uri = edb.split(':')
            defaultBasePreds.append(URIRef(mapping[pref] + uri))
        noMagic = []
        for pred in options.noMagic:
            pref, uri = pred.split(':')
            noMagic.append(URIRef(mapping[pref] + uri))
        if options.ddlGraph:
            ddlGraph = Graph().parse(options.ddlGraph, format='n3')
            # @TODO: should also get hybrid predicates from DDL graph
            defaultDerivedPreds=IdentifyDerivedPredicates(
                                    ddlGraph,
                                    Graph(),
                                    ruleSet)
        else:
            for idb in options.idb:
                pref, uri = idb.split(':')
                defaultDerivedPreds.add(URIRef(mapping[pref] + uri))
            defaultDerivedPreds.update(
                set([p == RDF.type and o or p for s, p, o in goals]))
            for hybrid in options.hybridPredicate:
                pref, uri = hybrid.split(':')
                hybridPredicates.append(URIRef(mapping[pref]+uri))

        if options.method == 'gms':
            for goal in goals:
                goalSeed=AdornLiteral(goal).makeMagicPred()
                print("Magic seed fact (used in bottom-up evaluation) %s" % goalSeed)
                magicSeeds.append(goalSeed.toRDFTuple())
            if noMagic:
                print("Predicates whose magic sets will not be calculated")
                for p in noMagic:
                    print("\t%s" % factGraph.qname(p))
            for rule in MagicSetTransformation(
                                       factGraph,
                                       ruleSet,
                                       goals,
                                       derivedPreds=bottomUpDerivedPreds,
                                       strictCheck=nameMap[options.strictness],
                                       defaultPredicates=(defaultBasePreds,
                                                          defaultDerivedPreds),
                                       noMagic=noMagic):
                magicRuleNo+=1
                network.buildNetworkFromClause(rule)
            if len(list(ruleSet)):
                print("reduction in size of program: %s (%s -> %s clauses)" % (
                                           100 - (float(magicRuleNo) /
                                                  float(len(list(ruleSet)))
                                                  ) * 100,
                                           len(list(ruleSet)),
                                           magicRuleNo))
            start = time.time()
            network.feedFactsToAdd(generateTokenSet(magicSeeds))
            if not [
                rule for rule in factGraph.adornedProgram if len(rule.sip)]:
                warnings.warn(
                    "Using GMS sideways information strategy with no "+
                      "information to pass from query.  Falling back to "+
                      "naive method over given facts and rules")
                network.feedFactsToAdd(workingMemory)
            sTime = time.time() - start
            if sTime > 1:
                sTimeStr = "%s seconds"%sTime
            else:
                sTime = sTime * 1000
                sTimeStr = "%s milli seconds"%sTime
            print("Time to calculate closure on working memory: %s" % sTimeStr)

            if options.output == 'rif':
                print("Rules used for bottom-up evaluation")
                if network.rules:
                    for clause in network.rules:
                        print(clause)
                else:
                    for clause in factGraph.adornedProgram:
                        print(clause)
            if options.output == 'conflict':
                network.reportConflictSet()

        elif options.method == 'bfp':
            topDownDPreds = defaultDerivedPreds
            if options.builtinTemplates:
                builtinTemplateGraph = Graph().parse(options.builtinTemplates,
                                                    format='n3')
                builtinDict = dict([(pred, template)
                              for pred, _ignore, template in
                                    builtinTemplateGraph.triples(
                                        (None,
                                         TEMPLATES.filterTemplate,
                                         None))])
            else:
                builtinDict = None
            topDownStore=TopDownSPARQLEntailingStore(
                            factGraph.store,
                            factGraph,
                            idb=ruleSet,
                            DEBUG=options.debug,
                            derivedPredicates=topDownDPreds,
                            templateMap=builtinDict,
                            nsBindings=network.nsMap,
                            identifyHybridPredicates=options.hybrid \
                                    if options.method == 'bfp' else False,
                            hybridPredicates=hybridPredicates)
            targetGraph = Graph(topDownStore)
            for pref, nsUri in list(network.nsMap.items()):
                targetGraph.bind(pref, nsUri)
            start = time.time()
            # queryLiteral = EDBQuery([BuildUnitermFromTuple(goal)
            #                                   for goal in goals],
            #                         targetGraph)
            # query = queryLiteral.asSPARQL()
            # print >>sys.stderr, "Goal to solve ", query
            sTime = time.time() - start
            result = targetGraph.query(options.why, initNs=network.nsMap)
            if result.askAnswer:
                sTime = time.time() - start
                if sTime > 1:
                    sTimeStr = "%s seconds"%sTime
                else:
                    sTime = sTime * 1000
                    sTimeStr = "%s milli seconds"%sTime
                print("Time to reach answer ground goal answer of %s: %s" % (
                      result.askAnswer[0], sTimeStr))
            else:
                for rt in result:
                    sTime = time.time() - start
                    if sTime > 1:
                        sTimeStr = "%s seconds" % sTime
                    else:
                        sTime = sTime * 1000
                        sTimeStr = "%s milli seconds" % sTime
                    if options.firstAnswer:
                        break
                    print(
                    "Time to reach answer %s via top-down SPARQL sip strategy: %s" % (
                    rt, sTimeStr))
            if options.output == 'conflict' and options.method == 'bfp':
                for _network, _goal in topDownStore.queryNetworks:
                    print(_network, _goal)
                    _network.reportConflictSet(options.debug)
                for query in topDownStore.edbQueries:
                    print(query.asSPARQL())

    elif options.method == 'naive':
        start = time.time()
        network.feedFactsToAdd(workingMemory)
        sTime = time.time() - start
        if sTime > 1:
            sTimeStr = "%s seconds"%sTime
        else:
            sTime = sTime * 1000
            sTimeStr = "%s milli seconds"%sTime
        print("Time to calculate closure on working memory: %s" % sTimeStr)
        print(network)
        if options.output == 'conflict':
            network.reportConflictSet()

    for fileN in options.filter:
        for rule in HornFromN3(fileN):
            network.buildFilterNetworkFromClause(rule)

    if options.negation and network.negRules and options.method in ['both',
                                                                    'bottomUp']:
        now=time.time()
        rt=network.calculateStratifiedModel(factGraph)
        print("Time to calculate stratified, stable model (inferred %s facts): %s" % (
                                    rt,
                                    time.time()-now))
    if options.filter:
        print("Applying filter to entailed facts")
        network.inferredFacts = network.filteredFacts

    if options.closure \
        and options.output in RDF_SERIALIZATION_FORMATS:
        cGraph = network.closureGraph(factGraph)
        cGraph.namespace_manager = namespace_manager
        print(cGraph.serialize(destination=None,
                               format=options.output,
                               base=None))
    elif options.output and options.output in RDF_SERIALIZATION_FORMATS:
        print(network.inferredFacts.serialize(destination=None,
                                              format=options.output,
                                              base=None))
示例#47
0
文件: rdf.py 项目: rfResearch/udata
# Extra Namespaces
ADMS = Namespace('http://www.w3.org/ns/adms#')
DCAT = Namespace('http://www.w3.org/ns/dcat#')
HYDRA = Namespace('http://www.w3.org/ns/hydra/core#')
SCHEMA = Namespace('http://schema.org/')
SCV = Namespace('http://purl.org/NET/scovo#')
SPDX = Namespace('http://spdx.org/rdf/terms#')
VCARD = Namespace('http://www.w3.org/2006/vcard/ns#')
FREQ = Namespace('http://purl.org/cld/freq/')
EUFREQ = Namespace(
    'http://publications.europa.eu/resource/authority/frequency/'
)  # noqa: E501
DCT = DCTERMS  # More common usage

namespace_manager = NamespaceManager(Graph())
namespace_manager.bind('dcat', DCAT)
namespace_manager.bind('dct', DCT)
namespace_manager.bind('foaf', FOAF)
namespace_manager.bind('foaf', FOAF)
namespace_manager.bind('hydra', HYDRA)
namespace_manager.bind('rdfs', RDFS)
namespace_manager.bind('scv', SCV)
namespace_manager.bind('skos', SKOS)
namespace_manager.bind('vcard', VCARD)
namespace_manager.bind('xsd', XSD)
namespace_manager.bind('freq', FREQ)

# Support JSON-LD in format detection
FORMAT_MAP = SUFFIX_FORMAT_MAP.copy()
FORMAT_MAP['json'] = 'json-ld'
FORMAT_MAP['jsonld'] = 'json-ld'
示例#48
0
#===============================================================================

import os

#===============================================================================

from rdflib import Graph
from rdflib.namespace import Namespace, NamespaceManager

import yaml

#===============================================================================

with open(os.path.join(os.path.split(__file__)[0], 'curie_map.yaml')) as f:
    curie_map = yaml.load(f, Loader=yaml.Loader)

SCICRUNCH_NS = NamespaceManager(Graph())
_namespaces = {}

for prefix, url in curie_map.items():
    ns = Namespace(url)
    SCICRUNCH_NS.bind(prefix, ns, override=True)
    _namespaces[prefix] = ns

#===============================================================================

def namespaces_dict():
    return _namespaces

#===============================================================================
示例#49
0
core_namespaces = {
    'dc' : rdflib.namespace.DC,
    'dcterms' : rdflib.namespace.DCTERMS,
    'ebucore' : Namespace(
        'http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#'),
    'fcrepo' : Namespace('http://fedora.info/definitions/fcrepo#'),
    'fcadmin' : Namespace('info:fcsystem/graph/admin'),
    'fcres' : Namespace('info:fcres'),
    'fcmain' : Namespace('info:fcsystem/graph/userdata/_main'),
    'fcstruct' : Namespace('info:fcsystem/graph/structure'),
    'fcsystem' : Namespace('info:fcsystem/'),
    'foaf': Namespace('http://xmlns.com/foaf/0.1/'),
    'iana' : Namespace('http://www.iana.org/assignments/relation/'),
    'ldp' : Namespace('http://www.w3.org/ns/ldp#'),
    'pcdm': Namespace('http://pcdm.org/models#'),
    'premis' : Namespace('http://www.loc.gov/premis/rdf/v1#'),
    'rdf' : rdflib.namespace.RDF,
    'rdfs' : rdflib.namespace.RDFS,
    'webac' : Namespace('http://www.w3.org/ns/auth/acl#'),
    'xsd' : rdflib.namespace.XSD,
}

ns_collection  = {pfx: Namespace(ns) for pfx, ns in config['namespaces'].items()}
ns_collection.update(core_namespaces)

ns_mgr = NamespaceManager(Graph())

# Collection of prefixes in a dict.
for ns,uri in ns_collection.items():
    ns_mgr.bind(ns, uri, override=False)
示例#50
0
from rdflib import Graph
from urllib import quote
from rfc3987 import parse  # URI/IRI validation
from os import sys
from resources import FRAME_IT_TO_EN
from resources import FRAME_DBPO_MAP
from rdflib.namespace import Namespace, NamespaceManager
from rdflib import Graph


# Namespace prefixes for RDF serialization
RESOURCE_NS = Namespace('http://it.dbpedia.org/resource/')
FACT_EXTRACTION_NS = Namespace('http://dbpedia.org/fact-extraction/')
ONTOLOGY_NS = Namespace('http://dbpedia.org/ontology/')
NAMESPACE_MANAGER = NamespaceManager(Graph())
NAMESPACE_MANAGER.bind('resource', RESOURCE_NS)
NAMESPACE_MANAGER.bind('fact', FACT_EXTRACTION_NS)
NAMESPACE_MANAGER.bind('ontology', ONTOLOGY_NS)

NAMESPACES = {
    'ontology': ONTOLOGY_NS,
    'resource': RESOURCE_NS,
    'fact_extraction': FACT_EXTRACTION_NS,
}

def to_assertions(labeled_results, id_to_title, outfile='dataset.nt',
                  score_dataset=None, format='nt'):
    """
    Serialize the labeled results into RDF NTriples

    :param list labeled_results: Data for each sentence. Schema:
示例#51
0
文件: rdf.py 项目: timlehr/vgmdb

turtle.TurtleSerializer.startDocument = turtle_patched_startDocument

# Namespaces in use
from rdflib.namespace import RDF, RDFS, XSD, DCTERMS, FOAF
BIO = Namespace("http://purl.org/vocab/bio/0.1/")
SCHEMA = Namespace("http://schema.org/")
MO = Namespace("http://purl.org/ontology/mo/")
EVENT = Namespace("http://purl.org/NET/c4dm/event.owl#")
TL = Namespace("http://purl.org/NET/c4dm/timeline.owl#")
ns = NamespaceManager(Graph())
namespaces = [(n.lower(), globals()[n]) for n in dict(globals())
              if isinstance(globals()[n], Namespace)]
for name, namespace in namespaces:
    ns.bind(name, namespace)


def generate(config, type, data):
    global base
    base = config.BASE_URL
    func_name = 'generate_%s' % type
    if func_name in globals():
        graph = globals()['generate_%s' % type](config, data)
    else:
        doc = BNode()
        uri = base
        graph = Graph('IOMemory', doc)
    graph.namespace_manager = ns
    return graph
示例#52
0
文件: rdfpipe.py 项目: carnotip/FuXi
def main():
  from optparse import OptionParser

  parser = OptionParser()
  parser.add_option('--stdin', type="choice",
    choices = ['xml', 'trix', 'n3', 'nt', 'rdfa'],
    help = 'Parse RDF from STDIN (useful for piping) with given format')
  parser.add_option('-x', '--xml', action='append',
    help = 'Append to the list of RDF/XML documents to parse')
  parser.add_option('-t', '--trix', action='append',
    help = 'Append to the list of TriX documents to parse')
  parser.add_option('-n', '--n3', action='append',
    help = 'Append to the list of N3 documents to parse')
  parser.add_option('--nt', action='append',
    help = 'Append to the list of NT documents to parse')
  parser.add_option('-a', '--rdfa', action='append',
    help = 'Append to the list of RDFa documents to parse')

  parser.add_option('-o', '--output', type="choice",
    choices = ['n3', 'xml', 'pretty-xml', 'TriX', 'turtle', 'nt'],
    help = 'Format of the final serialized RDF graph')

  parser.add_option('-m', '--ns', action='append',
    help = 'Register a namespace binding (QName prefix to a base URI)')

  parser.add_option('-r', '--rules', action='append',
    help = 'Append to the list of fact files to use to perform reasoning')
  parser.add_option('-i', '--inferred',
    help = 'URI to use for the graph containing any inferred triples')

  parser.set_defaults(
      xml=[], trix=[], n3=[], nt=[], rdfa=[], ns=[],
      output='n3'
    )

  (options, args) = parser.parse_args()

  store = plugin.get(RDFLIB_STORE,Store)()
  store.open(RDFLIB_CONNECTION)

  namespace_manager = NamespaceManager(Graph())
  for prefixDef in options.ns:
    prefix, uri = prefixDef.split('=')
    namespace_manager.bind(prefix, uri, override=False)

  factGraph = ConjunctiveGraph(store)
  for graphRef in options.xml:
    factGraph.parse(graphRef, publicID=Uri.OsPathToUri(graphRef),
                    format='xml')
  for graphRef in options.trix:
    factGraph.parse(graphRef, publicID=Uri.OsPathToUri(graphRef),
                    format='trix')
  for graphRef in options.n3:
    factGraph.parse(graphRef, publicID=Uri.OsPathToUri(graphRef),
                    format='n3')
  for graphRef in options.nt:
    factGraph.parse(graphRef, publicID=Uri.OsPathToUri(graphRef),
                    format='nt')
  for graphRef in options.rdfa:
    factGraph.parse(graphRef, publicID=Uri.OsPathToUri(graphRef),
                    format='rdfa')
  if options.stdin:
    factGraph.parse(sys.stdin, format=options.stdin)

  if options.inferred and len(options.rules) > 0:
    inferredURI = URIRef(options.inferred)
    ruleStore = N3RuleStore()
    ruleGraph = Graph(ruleStore)
    for ruleFile in options.rules:
      ruleGraph.parse(ruleFile, format='n3')
    tokenSet = generateTokenSet(factGraph)
    deltaGraph = Graph(store=factGraph.store,
                       identifier=inferredURI)
    network = ReteNetwork(ruleStore,
                          inferredTarget=deltaGraph)
    network.feedFactsToAdd(tokenSet)

  print factGraph.serialize(destination=None, format=options.output,
                            base=None)
  store.rollback()
示例#53
0
def spo(request, doc_id):
    # Create and bind namespaces
    namespace_manager = NamespaceManager(Graph())
    for ns in namespaces_dict:
        namespace_manager.bind(ns, Namespace(namespaces_dict[ns]), override=False)

    # Load graph from the uploaded file

    if type(doc_id) == int or doc_id == "1":
        # If doc_id is an integer, a file was uploaded. If doc_id is the string "1", the example is parsed.

        graph = getrdf(doc_id)
    else:
        # If doc_id is not an integer, therefore a string, an SPARQL-endpoint url is being parsed
        graph = Graph()
        graph.parse(data=spo2rdfjson(doc_id), format="rdf-json")

        # Generate list of triples
    triple_list = []
    subject_list = []
    predicate_list = []
    object_list = []
    for s, p, o in graph:
        triple_list.append([s, p, o])
        subject_list.append(str(s).encode('utf-8', 'ignore'))
        predicate_list.append(str(p).encode('utf-8', 'ignore'))
        # print str(o).encode('utf-8', 'ignore')
        object_list.append(str(o).decode('utf-8', 'ignore'))

    subject_set = json.dumps(list(set(subject_list)))
    predicate_set = json.dumps(list(set(predicate_list)))
    object_set = json.dumps(list(set(object_list)))


    # Determine xml:base
    subject_base_test_set = {triple[0] for triple in triple_list}
    base_set = {subject[:subject.rfind("/")] for subject in subject_base_test_set}
    # If all subjects share the same substring-base, this substring-base is likely to be the xml:base.
    if len(base_set) == 1:
        base = str(list(base_set)[0]) + "/"
    else:
        base = ""

    # Insert namespaces into graph
    graph.namespace_manager = namespace_manager

    triple_fetcher_classes = get_triple_fetcher_classes()

    # Get the config files
    mapping_config = json.loads(open(SINDICE_CONFIG_MAPPING, 'r').read())
    query_config = json.loads(open(SINDICE_CONFIG_QUERY, 'r').read())
    import_label_lengths = dict()

    for tf_class in triple_fetcher_classes:
        mapping_length = len(mapping_config[tf_class])
        query_length = len(query_config[tf_class])

        if mapping_length >= query_length:
            import_label_lengths[tf_class] = mapping_length
        else:
            import_label_lengths[tf_class] = query_length

    import_config = {"query": query_config, "mapping": mapping_config, "import_label_lengths": import_label_lengths}

    import_config_dj = dict()
    for tf_class in triple_fetcher_classes:
        import_config_dj[tf_class] = dict()





    for tf_class in import_config_dj:
        import_config_dj[tf_class]["query"] = dict()
        import_config_dj[tf_class]["mapping"] = dict()


    for tf_class in import_config["query"]:
        i = 0
        for conf in import_config["query"][tf_class]:
            import_config_dj[tf_class]["query"][i] = {conf : import_config["query"][tf_class][conf]}
            i += 1

    for tf_class in import_config["mapping"]:
        i = 0
        for conf in import_config["mapping"][tf_class]:
            import_config_dj[tf_class]["mapping"][i] = {conf : import_config["mapping"][tf_class][conf]}
            i += 1

    print import_config_dj

    # Serialize graph to RDFJson
    rdfjson = graph.serialize(None, format="rdf-json")
    return render_to_response(
        'rdfedit/triples.html',
        {'rdfjson': rdfjson,
         'triple_list': triple_list,
         'subject_set': subject_set,
         'predicate_set': predicate_set,
         'object_set': object_set,
         'namespaces_dict': json.dumps(namespaces_dict),
         'base': base,
         "triple_fetcher_classes": json.dumps(triple_fetcher_classes),
         "import_config": json.dumps(import_config),
         "import_config_dj": import_config_dj},
        context_instance=RequestContext(request)
    )
示例#54
0
from rdflib.namespace import Namespace, NamespaceManager
from rdflib import Graph

#Our data namespace
D = Namespace('http://vivo.mydomain.edu/individual/')
#The VIVO namespace
VIVO = Namespace('http://vivoweb.org/ontology/core#')
#The VCARD namespace
VCARD = Namespace('http://www.w3.org/2006/vcard/ns#')
#The OBO namespace
OBO = Namespace('http://purl.obolibrary.org/obo/')
#The BIBO namespace
BIBO = Namespace('http://purl.org/ontology/bibo/')
#The FOAF namespace
FOAF = Namespace('http://xmlns.com/foaf/0.1/')
#The SKOS namespace
SKOS = Namespace('http://www.w3.org/2004/02/skos/core#')

ns_manager = NamespaceManager(Graph())
ns_manager.bind('d', D)
ns_manager.bind('vivo', VIVO)
ns_manager.bind('vcard', VCARD)
ns_manager.bind('obo', OBO)
ns_manager.bind('bibo', BIBO)
ns_manager.bind("foaf", FOAF)
ns_manager.bind("skos", SKOS)
示例#55
0
文件: rdf.py 项目: qood/vgmdb
	if self._spacious:
		self.write('\n')

turtle.TurtleSerializer.startDocument = turtle_patched_startDocument

# Namespaces in use
from rdflib.namespace import RDF, RDFS, XSD, DCTERMS, FOAF
BIO = Namespace("http://purl.org/vocab/bio/0.1/")
SCHEMA = Namespace("http://schema.org/")
MO = Namespace("http://purl.org/ontology/mo/")
EVENT = Namespace("http://purl.org/NET/c4dm/event.owl#")
TL = Namespace("http://purl.org/NET/c4dm/timeline.owl#")
ns = NamespaceManager(Graph())
namespaces = [(n.lower(), globals()[n]) for n in dict(globals()) if isinstance(globals()[n], Namespace)]
for name,namespace in namespaces:
	ns.bind(name, namespace)

def generate(config, type, data):
	global base
	base = config.BASE_URL
	func_name = 'generate_%s'%type
	if func_name in globals():
		graph = globals()['generate_%s'%type](config, data)
	else:
		doc = BNode()
		uri = base
		graph = Graph('IOMemory', doc)
	graph.namespace_manager = ns
	return graph

def link(link):
示例#56
0
from .resolver import get_URI_for_AILLA, get_URI_for_ANLA, get_URI_for_TLA, get_URI_for_Paradisec, get_URI_for_ELAR


#define general namespaces
QUEST = Namespace("http://zasquest.org/")
QUESTRESOLVER = Namespace("http://zasquest.org/resolver/")
DBPEDIA = Namespace("http://dbpedia.org/ontology/")
WIKIDATA = Namespace("http://www.wikidata.org/entity/")
LGR = Namespace("https://www.eva.mpg.de/lingua/resources/glossing-rules.php/")
LIGT = Namespace("http://purl.org/liodi/ligt/")
FLEX = Namespace("http://example.org/flex/")
NIF = Namespace("http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#")

#define archive namespaces
ELD_NAMESPACE_MANAGER = NamespaceManager(Graph())
ELD_NAMESPACE_MANAGER.bind('dbpedia', DBPEDIA)
ELD_NAMESPACE_MANAGER.bind('wikidata', WIKIDATA)
ELD_NAMESPACE_MANAGER.bind('quest', QUEST) #for ontology
ELD_NAMESPACE_MANAGER.bind('QUESTRESOLVER', QUESTRESOLVER) #for the bridge for rewritable URLs
ELD_NAMESPACE_MANAGER.bind("rdfs", RDFS)
ELD_NAMESPACE_MANAGER.bind("dc", DC)
ELD_NAMESPACE_MANAGER.bind("lgr", LGR)
ELD_NAMESPACE_MANAGER.bind("ligt", LIGT)
ELD_NAMESPACE_MANAGER.bind("flex", FLEX)
ELD_NAMESPACE_MANAGER.bind("nif", NIF)

ARCHIVE_NAMESPACES = {
    'paradisec': Namespace("https://catalog.paradisec.org.au/collections/"),
    #'elarcorpus': Namespace("https://lat1.lis.soas.ac.uk/corpora/ELAR/"),
    'elarcorpus': Namespace("https://elar.soas.ac.uk/Record/"),
    'elarfiles': Namespace("https://elar.soas.ac.uk/resources/"),
示例#57
0
from rdflib import URIRef, Literal, BNode
from rdflib.namespace import RDF, SKOS, RDFS, OWL, Namespace, NamespaceManager, XSD

BF = Namespace("http://id.loc.gov/ontologies/bibframe/")
BDR = Namespace("http://purl.bdrc.io/resource/")
BDO = Namespace("http://purl.bdrc.io/ontology/core/")
TMP = Namespace("http://purl.bdrc.io/ontology/tmp/")
BDG = Namespace("http://purl.bdrc.io/graph/")
BDA = Namespace("http://purl.bdrc.io/admindata/")
ADM = Namespace("http://purl.bdrc.io/ontology/admin/")
MBBT = Namespace("http://mbingenheimer.net/tools/bibls/")
CBCT_URI = "https://dazangthings.nz/cbc/text/"
CBCT = Namespace(CBCT_URI)

NSM = NamespaceManager(rdflib.Graph())
NSM.bind("bdr", BDR)
NSM.bind("", BDO)
NSM.bind("bdg", BDG)
NSM.bind("bda", BDA)
NSM.bind("adm", ADM)
NSM.bind("skos", SKOS)
NSM.bind("rdf", RDF)
NSM.bind("cbct", CBCT)
NSM.bind("mbbt", MBBT)
NSM.bind("bf", BF)

K_TO_T = {}
K_TO_SKT = {}
T_TO_SKT = {}

with open('input/Taisho-K.csv', newline='') as csvfile:
import sys
import os
import csv
import re
import rdflib
from rdflib import URIRef, Literal, BNode
from rdflib.namespace import RDF, SKOS, Namespace, NamespaceManager, XSD

BDR = Namespace("http://purl.bdrc.io/resource/")
BDO = Namespace("http://purl.bdrc.io/ontology/core/")
BDG = Namespace("http://purl.bdrc.io/graph/")
BDA = Namespace("http://purl.bdrc.io/admindata/")
ADM = Namespace("http://purl.bdrc.io/ontology/admin/")

NSM = NamespaceManager(rdflib.Graph())
NSM.bind("bdr", BDR)
NSM.bind("", BDO)
NSM.bind("bdg", BDG)
NSM.bind("bda", BDA)
NSM.bind("adm", ADM)
NSM.bind("skos", SKOS)

def linestordf(csvlines, graphname):
    """
    Returns an RDF graph or dataset from a yaml object
    """
    curidx = 0
    ds = rdflib.Dataset()
    g = ds.graph(BDG[graphname])
    g.namespace_manager = NSM
    i = 0
示例#59
0
import json
import sys
import urllib.parse
from rdflib import Namespace, Graph, RDFS  # , URIRef, BNode
from rdflib.namespace import NamespaceManager, DC  # , FOAF
from resolver import get_URI_for_AILLA, get_URI_for_ANLA, get_URI_for_TLA, get_URI_for_Paradisec, get_URI_for_ELAR

# define general namespaces
#QUEST = Namespace("http://zasquest.org/")
#QUESTRESOLVER = Namespace("http://zasquest.org/resolver/")
WIKIDATA = Namespace("https://www.wikidata.org/wiki/")

# define archive namespaces
NAMESPACE_MANAGER = NamespaceManager(Graph())
NAMESPACE_MANAGER.bind("wikidata", WIKIDATA)
#NAMESPACE_MANAGER.bind("quest", QUEST)  # for ontology
#NAMESPACE_MANAGER.bind(
#"QUESTRESOLVER", QUESTRESOLVER
#)  # for the bridge for rewritable URLs
NAMESPACE_MANAGER.bind("rdfs", RDFS)
NAMESPACE_MANAGER.bind("dc", DC)

ARCHIVE_NAMESPACES = {
    'paradisec':
    Namespace("https://catalog.paradisec.org.au/collections/"),
    #'elarcorpus': Namespace("https://lat1.lis.soas.ac.uk/corpora/ELAR/"),
    'elarcorpus':
    Namespace("https://elar.soas.ac.uk/Record/"),
    'elarfiles':
    Namespace("https://elar.soas.ac.uk/resources/"),
示例#60
0
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
"""

# # Sample ontology on which we test grontocrawler
#
# Please pay attention that it does contain some mistakes
#
from rdflib.extras.infixowl import (Class, Property, CastClass, some)
from rdflib import Namespace, Graph, OWL, Literal
from rdflib.namespace import NamespaceManager

ns = Namespace('http://plumdeq.xyz/ontologies/hypothesis/')
ns_manager = NamespaceManager(Graph())
ns_manager.bind('hypo', ns, override=False)
ns_manager.bind('owl', OWL, override=False)
g = Graph()
g.namespace_manager = ns_manager

# ## Main classes
#
con = Class(
    ns.Continuant,
    graph=g,
    comment=Literal('Material entity. Examples: cells, molecules, joints'))
occ = Class(
    ns.Occurent,
    graph=g,
    comment=Literal('Occuring processes, which start and end at some point'))
condition = Class(