class ParserTestCase(unittest.TestCase): backend = 'default' path = 'store' def setUp(self): self.graph = Graph(store=self.backend) self.graph.open(self.path) def tearDown(self): self.graph.close() def testNoPathWithHash(self): g = self.graph g.parse(data="""\ <?xml version="1.0" encoding="UTF-8" standalone="yes"?> <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" > <rdfs:Class rdf:about="http://example.org#"> <rdfs:label>testing</rdfs:label> </rdfs:Class> </rdf:RDF> """, publicID="http://example.org") subject = URIRef("http://example.org#") label = g.value(subject, RDFS.label) self.assertEqual(label, Literal("testing")) type = g.value(subject, RDF.type) self.assertEqual(type, RDFS.Class)
class PychinkoTestCase(unittest.TestCase): backend = "default" tmppath = None def setUp(self): self.g = Graph(store=self.backend) self.tmppath = mkdtemp() self.g.open(configuration=self.tmppath) self.g.parse("test/a.n3", format="n3") def tearDown(self): self.g.close() shutil.rmtree(tmppath) def testPychinko(self): rules = [] for s, p, o in self.g.triples((None, LOG.implies, None)): lhs = list(patterns(s)) rhs = list(patterns(o)) rules.append(terms.Rule(lhs, rhs, (s, p, o))) interp = Interpreter(rules) f = Graph() f.parse("http://eikeon.com/") source = f source = self.g interp.addFacts(set(facts(source)), initialSet=True) interp.run()
class PostgreSQLStoreTests(unittest.TestCase): storetest = True store_name = "PostgreSQL" path = configString create = True def setUp(self): self.graph = Graph(store=self.store_name) self.graph.open(self.path, create=self.create) def tearDown(self): self.graph.destroy(self.path) self.graph.close() import os if hasattr(self, "path") and self.path is not None: if os.path.exists(self.path): if os.path.isdir(self.path): for f in os.listdir(self.path): os.unlink(self.path + "/" + f) os.rmdir(self.path) elif len(self.path.split(":")) == 1: os.unlink(self.path) else: os.remove(self.path) def test_PostgreSQL_testN3_store(self): testN3Store("PostgreSQL", configString)
class Processor(object): def __init__(self, stream_urls): self.client = client.Client(stream_urls, event_callback=self._handle_event, error_callback=self._handle_error, separate_events=False) self.triple_store = Graph('Sleepycat', 'http://www.it.uc3m.es/jaf/ns/slog/db') self.db_dir = 'dbdir' def start(self, loop=False): self.triple_store.open(self.db_dir) self.client.start(loop=loop) def stop(self): self.triple_store.close() self.client.stop() def _handle_error(self, message, http_error=None): pass def _handle_event(self, evs): print('Received {} events.'.format(len(evs))) for event in evs: self.triple_store += event.body print(len(self.triple_store))
class ParserTestCase(unittest.TestCase): backend = 'default' path = 'store' def setUp(self): self.graph = Graph(store=self.backend) self.graph.open(self.path) def tearDown(self): self.graph.close() def testNoPathWithHash(self): g = self.graph g.parse(data="""\ <?xml version="1.0" encoding="UTF-8" standalone="yes"?> <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" > <rdfs:Class rdf:about="http://example.org#"> <rdfs:label>testing</rdfs:label> </rdfs:Class> </rdf:RDF> """, publicID="http://example.org") subject = URIRef("http://example.org#") label = g.value(subject, RDFS.label) self.assertEquals(label, Literal("testing")) type = g.value(subject, RDF.type) self.assertEquals(type, RDFS.Class)
class PychinkoTestCase(unittest.TestCase): backend = 'default' tmppath = None def setUp(self): self.g = Graph(store=self.backend) self.tmppath = mkdtemp() self.g.open(configuration=self.tmppath) self.g.parse("test/a.n3", format="n3") def tearDown(self): self.g.close() shutil.rmtree(tmppath) def testPychinko(self): rules = [] for s, p, o in self.g.triples((None, LOG.implies, None)): lhs = list(patterns(s)) rhs = list(patterns(o)) rules.append(terms.Rule(lhs, rhs, (s, p, o))) interp = Interpreter(rules) f = Graph() f.parse("http://eikeon.com/") source = f source = self.g interp.addFacts(set(facts(source)), initialSet=True) interp.run()
def test_different_uris(): """ Pass different URIs to __init__() and to open(). """ store = ProxyStore(identifier="http://localhost:1234/foo") graph = Graph(store=store) graph.open({PS_CONFIG_URI: "http://localhost:1234/foo/group1/"}) graph.close()
def test_no_uri(): """ Pass no URI to __init__() nor to open(). """ store = ProxyStore() graph = Graph(store=store) graph.open({}) graph.close()
def test_05_persist(self): graph = Graph("SWIStore", identifier="test") assert not graph.store.attached graph.open("test.db") assert graph.store.attached graph.parse(cofog_test) stat("test.db") graph.close()
def test_identifier_no_open(): """ Pass the URI as identifier to __init__() but does not explicitely call open(). Should be OK. """ store = ProxyStore(identifier="http://localhost:1234/foo") graph = Graph(store=store) gs = graph.serialize() #print gs graph.close()
def test_no_uri_no_open(): """ Nothing passed to __init__(), and open() is not called either. Try to manipulate the graph, should trigger an error. """ store = ProxyStore() graph = Graph(store=store) gs = graph.serialize() graph.close()
def test_uri_with_wrong_credentials_in_init(): """ Pass an URI to __init__() and wrong credentials in configuration. """ store = ProxyStore(configuration={PS_CONFIG_USER: "******", PS_CONFIG_PWD: "wrong-pwd"}, identifier="http://localhost:1234/foo") graph = Graph(store=store) graph.close()
def test_identifier_no_open(): """ Pass the URI as identifier to __init__() but does not explicitely call open(). Should be OK. """ store = ProxyStore(identifier="http://localhost:1234/foo/") graph = Graph(store=store) gs = graph.serialize() #print gs graph.close()
def test_different_uris(): """ Pass different URIs to __init__() and to open(). """ with assert_raises(StoreIdentifierError): store = ProxyStore(identifier="http://localhost:1234/foo/") graph = Graph(store=store) graph.open({PS_CONFIG_URI: "http://localhost:1234/foo/group1/"}) graph.close()
def test_no_uri(): """ Pass no URI to __init__() nor to open(). """ with assert_raises(StoreIdentifierError): store = ProxyStore() graph = Graph(store=store) graph.open({}) graph.close()
def test_no_uri_no_open(): """ Nothing passed to __init__(), and open() is not called either. Try to manipulate the graph, should trigger an error. """ with assert_raises(AssertionError): store = ProxyStore() graph = Graph(store=store) gs = graph.serialize() graph.close()
def test_uri_with_good_credentials_in_init(): """ Pass an URI to __init__() and good credentials in configuration. Should be OK. """ http = httplib2.Http() http.add_credentials("user", "pwd") store = ProxyStore(configuration={PS_CONFIG_HTTP_CX: http}, identifier="http://localhost:1234/foo/") graph = Graph(store=store) graph.close()
def test_06_retract(self): graph = Graph("SWIStore", identifier="test") graph.open("test.db") ntriples = len(list(graph.triples((None, RDFS.label, None)))) assert ntriples > 0 graph.remove((None, RDFS.label, None)) ntriples = len(list(graph.triples((None, RDFS.label, None)))) assert ntriples == 0 graph.store.unload(graph) graph.close()
def test_uri_with_good_credentials_in_open(): """ Pass an URI to __init__() and good credentials in configuration. Should be OK. """ store = ProxyStore(identifier="http://localhost:1234/foo") graph = Graph(store=store) graph.open(configuration={PS_CONFIG_USER: "******", PS_CONFIG_PWD: "pwd"}) graph.close()
def test_no_identifier_uri_in_open(): """ Nothing passed to __init__(), uri passed to explicit open(). Should be OK. """ store = ProxyStore() graph = Graph(store=store) graph.open({PS_CONFIG_URI: "http://localhost:1234/foo"}) gs = graph.serialize() #print gs graph.close()
def test_identifier_no_configuration(): """ Pass the URI as identifier to __init__() then call open() with no configuration parameter. """ store = ProxyStore(identifier="http://localhost:1234/foo/") graph = Graph(store=store) graph.open({}) gs = graph.serialize() #print gs graph.close()
def test_no_identifier_uri_in_open(): """ Nothing passed to __init__(), uri passed to explicit open(). Should be OK. """ store = ProxyStore() graph = Graph(store=store) graph.open({PS_CONFIG_URI: "http://localhost:1234/foo/"}) gs = graph.serialize() #print gs graph.close()
def test_identifier_no_configuration(): """ Pass the URI as identifier to __init__() then call open() with no configuration parameter. """ store = ProxyStore(identifier="http://localhost:1234/foo") graph = Graph(store=store) graph.open({}) gs = graph.serialize() #print gs graph.close()
class TestSPARQLStoreGraphCore(unittest.TestCase): store_name = 'SPARQLStore' path = "http://dbpedia.org/sparql" storetest = True create = False def setUp(self): self.graph = Graph(store="SPARQLStore") self.graph.open(self.path, create=self.create) ns = list(self.graph.namespaces()) assert len(ns) > 0, ns def tearDown(self): self.graph.close()
class MySQLStoreTests(unittest.TestCase): storetest = True store_name = "MySQL" path = configString create = True identifier = "rdflib_test" def setUp(self): self.graph = Graph(store=self.store_name) self.graph.destroy(self.path) self.graph.open(self.path, create=self.create) def tearDown(self): self.graph.destroy(self.path) self.graph.close()
class PostgreSQLStoreTests(unittest.TestCase): storetest = True store_name = "PostgreSQL" path = configString create = True def setUp(self): self.graph = Graph(store=self.store_name) self.graph.open(self.path, create=self.create) def tearDown(self): self.graph.destroy(self.path) self.graph.close() def test_PostgreSQL_testN3_store(self): testN3Store('PostgreSQL', configString)
class TestSPARQLStoreGraphCore(unittest.TestCase): store_name = 'SPARQLStore' path = "http://dbpedia.org/sparql" storetest = True create = False def setUp(self): self.graph = Graph(store="SPARQLStore") self.graph.open(self.path, create=self.create) ns = list(self.graph.namespaces()) assert len(ns) > 0, ns def tearDown(self): self.graph.close() def test(self): print("Done")
class SeqTestCase(unittest.TestCase): backend = "default" path = "store" def setUp(self): self.store = Graph(store=self.backend) self.store.open(self.path) self.store.parse(data=s, format="xml") def tearDown(self): self.store.close() def testSeq(self): items = self.store.seq(URIRef("http://example.org/Seq")) self.assertEqual(len(items), 6) self.assertEqual(items[-1], URIRef("http://example.org/six")) self.assertEqual(items[2], URIRef("http://example.org/three")) # just make sure we can serialize self.store.serialize()
class TypeCheckCase(unittest.TestCase): unstable = ( True # TODO: until we decide if we want to add type checking back to rdflib ) backend = "default" path = "store" def setUp(self): self.store = Graph(backend=self.backend) self.store.open(self.path) def tearDown(self): self.store.close() def testSubjectTypeCheck(self): self.assertRaises(SubjectTypeError, self.store.add, (None, foo, foo)) def testPredicateTypeCheck(self): self.assertRaises(PredicateTypeError, self.store.add, (foo, None, foo)) def testObjectTypeCheck(self): self.assertRaises(ObjectTypeError, self.store.add, (foo, foo, None))
class TypeCheckCase(unittest.TestCase): unstable = True # TODO: until we decide if we want to add type checking back to rdflib backend = 'default' path = 'store' def setUp(self): self.store = Graph(backend=self.backend) self.store.open(self.path) def tearDown(self): self.store.close() def testSubjectTypeCheck(self): self.assertRaises(SubjectTypeError, self.store.add, (None, foo, foo)) def testPredicateTypeCheck(self): self.assertRaises(PredicateTypeError, self.store.add, (foo, None, foo)) def testObjectTypeCheck(self): self.assertRaises(ObjectTypeError, self.store.add, (foo, foo, None))
class RDFTestCase(unittest.TestCase): backend = 'default' path = 'store' def setUp(self): self.store = Graph(store=self.backend) self.store.open(self.path) self.store.bind("dc", "http://http://purl.org/dc/elements/1.1/") self.store.bind("foaf", "http://xmlns.com/foaf/0.1/") def tearDown(self): self.store.close() def addDonna(self): self.donna = donna = BNode() self.store.add((donna, RDF.type, FOAF["Person"])) self.store.add((donna, FOAF["nick"], Literal("donna"))) self.store.add((donna, FOAF["name"], Literal("Donna Fales"))) def testRDFXML(self): self.addDonna() g = Graph() g.parse(data=self.store.serialize(format="pretty-xml")) self.assertEquals(self.store.isomorphic(g), True)
class GraphTest(unittest.TestCase): backend = 'default' path = 'store' def setUp(self): self.store = Graph(store=self.backend) self.store.open(self.path) self.remove_me = (BNode(), RDFS.label, Literal("remove_me")) self.store.add(self.remove_me) def tearDown(self): self.store.close() def testAdd(self): subject = BNode() self.store.add((subject, RDFS.label, Literal("foo"))) def testRemove(self): self.store.remove(self.remove_me) self.store.remove((None, None, None)) def testTriples(self): for s, p, o in self.store: pass
class GraphTest(unittest.TestCase): backend = "default" path = "store" def setUp(self): self.store = Graph(store=self.backend) self.store.open(self.path) self.remove_me = (BNode(), RDFS.label, Literal("remove_me")) self.store.add(self.remove_me) def tearDown(self): self.store.close() def testAdd(self): subject = BNode() self.store.add((subject, RDFS.label, Literal("foo"))) def testRemove(self): self.store.remove(self.remove_me) self.store.remove((None, None, None)) def testTriples(self): for s, p, o in self.store: pass
def get_current_version(self): """ Get Virtuoso Database Graph Current Version """ query = """\ prefix owl: <http://www.w3.org/2002/07/owl#> prefix xsd: <http://www.w3.org/2001/XMLSchema#> select distinct ?version ?origen FROM <%(m_graph)s> {{ select distinct ?version ?origen ?data FROM <%(m_graph)s> where {?s owl:versionInfo ?version; <%(m_graph)scommited> ?data; <%(m_graph)sproduto> "%(v_graph)s"; <%(m_graph)sorigen> ?origen.} ORDER BY desc(?data) LIMIT 1 }}""" % {'m_graph': self.migration_graph, 'v_graph': self.__virtuoso_graph} graph = Graph(store="SPARQLStore") graph.open(self.__virtuoso_endpoint, create=False) graph.store.baseURI = self.__virtuoso_endpoint ns = list(graph.namespaces()) assert len(ns) > 0, ns res = graph.query(query) graph.close() nroResults = len(res) if nroResults > 0: res.vars = ['version', 'origen'] versao, origem = iter(res).next() versao = None if str(versao) == 'None' else str(versao) return versao, str(origem) else: return None, None
class KnowledgeGraph: """ Knowledge Graph Class A wrapper around an imported rdflib.Graph object with convenience functions """ graph = None _property_distribution = {} def __init__(self, graph=None): self.logger = logging.getLogger() self.logger.debug("Initiating Knowledge Graph") if graph is not None: if type(graph) is Graph: self.graph = graph elif type(graph) is str: self.graph = self._read([graph]) elif type(graph) is list: self.graph = self._read(graph) else: raise TypeError(":: Wrong input type: {}; requires path to RDF" " graph or rdflib.graph.Graph object".format(type(graph))) else: self.graph = Graph() self._property_distribution = Counter(self.graph.predicates()) self.logger.debug("Knowledge Graph ({} facts) succesfully imported".format(len(self.graph))) def _read(self, paths=None): graph = Graph() for path in paths: assert is_readable(path) if not is_gzip(path): graph.parse(path, format=guess_format(path)) else: self.logger.debug("Input recognized as gzip file") with gzip.open(path, 'rb') as f: graph.parse(f, format=guess_format(path[:-3])) return graph def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): self.graph.destroy("store") self.graph.close(True) def __len__(self): return len(self.graph) ### Generators ### def atoms(self, separate_literals=True): self.logger.debug("Yielding atoms (separated literals: {})".format( separate_literals)) seen = set() for s, p, o in self.graph.triples((None, None, None)): for atom in (s, o): if separate_literals and isinstance(atom, Literal): atom = self.UniqueLiteral(s, p, atom) if atom in seen: continue seen.add(atom) yield atom def non_terminal_atoms(self): self.logger.debug("Yielding non-terminal atoms") for atom in frozenset(self.graph.subjects()): yield(atom) def terminal_atoms(self): self.logger.debug("Yielding terminal atoms") non_terminal_atoms = list(self.non_terminal_atoms()) for atom in list(self.graph.objects()): if atom in non_terminal_atoms: continue yield(atom) def attributes(self): self.logger.debug("Yielding attributes") for obj in self.graph.objects(): if type(obj) is Literal: yield(obj) def entities(self, omit_blank_nodes=False): self.logger.debug("Yielding entities") for res in self.atoms(): if (type(res) is Literal or (omit_blank_nodes and type(res) is BNode)): continue yield(res) def objecttype_properties(self): # return unique properties attributes = frozenset(self.attributes()) self.logger.debug("Yielding OT predicates") for p in self.graph.predicates(): if len(set(self.graph.objects(None, p))-attributes) <= 0: # p is only used with a literal as object continue yield(p) def datatype_properties(self): # return unique properties objecttype_properties = set(self.objecttype_properties()) self.logger.debug("Yielding DT predicates") for p in self.graph.predicates(): if p in objecttype_properties: continue yield(p) def properties(self): self.logger.debug("Yielding properties") for p in self.graph.predicates(): yield(p) def triples(self, triple=(None, None, None), separate_literals=True): self.logger.debug("Yielding triples (triple {})".format(triple)) for s,p,o in self.graph.triples(triple): if separate_literals and isinstance(o, Literal): o = self.UniqueLiteral(s, p, o) yield s, p, o ## Statistics def property_frequency(self, property=None): if property is None: return self._property_distribution elif property in self._property_distribution: return self._property_distribution[property] def attribute_frequency(self, property, limit=None): attribute_freq = Counter(self.graph.objects(None, property)) if limit is None: return attribute_freq.most_common() else: return attribute_freq.most_common(limit) ## Operators def sample(self, strategy=None, **kwargs): """ Sample this graph using the given strategy returns a KnowledgeGraph instance """ if strategy is None: raise ValueError('Strategy cannot be left undefined') self.logger.debug("Sampling graph") return strategy.sample(self, **kwargs) def quickSort(self, lst): """Needed to sort deterministically when using UniqueLiterals""" less = list() pivotList = list() more = list() if len(lst) <= 1: return lst pivot = lst[0] for member in lst: if str(member) < str(pivot): less.append(member) elif str(member) > str(pivot): more.append(member) else: pivotList.append(member) less = self.quickSort(less) more = self.quickSort(more) return less + pivotList + more class UniqueLiteral(Literal): # literal with unique hash, irrespective of content def __new__(cls, s, p, o): self = super().__new__(cls, str(o), o.language, o.datatype, normalize=None) self.s = str(s) self.p = str(p) return self def __hash__(self): base = self.s + self.p + str(self) for attr in [self.language, self.datatype]: if attr is not None: base += str(attr) return hash(base) def __eq__(self, other): if type(other) is not type(self): return False return hash(repr(self)) == hash(repr(other)) @total_ordering def __lt__(self, other): if type(other) is not type(self): return False if str(self) < str(other): return True if self.s < other.s: return True if self.p < other.p: return True return False
class MetadataCache(with_metaclass(abc.ABCMeta, object)): """Super-class for all metadata cache implementations. """ def __init__(self, store, cache_uri): self.store = store self.cache_uri = cache_uri self.graph = Graph(store=self.store, identifier=_DB_IDENTIFIER) self.is_open = False self.catalog_source = _GUTENBERG_CATALOG_URL @property def exists(self): """Detect if the cache exists. """ return os.path.exists(self._local_storage_path) def open(self): """Opens an existing cache. """ try: self.graph.open(self.cache_uri, create=False) self._add_namespaces(self.graph) self.is_open = True except Exception: raise InvalidCacheException('The cache is invalid or not created') def close(self): """Closes an opened cache. """ self.graph.close() self.is_open = False def delete(self): """Delete the cache. """ self.close() remove(self._local_storage_path) def populate(self): """Populates a new cache. """ if self.exists: raise CacheAlreadyExistsException('location: %s' % self.cache_uri) self._populate_setup() with closing(self.graph): with self._download_metadata_archive() as metadata_archive: for fact in self._iter_metadata_triples(metadata_archive): self._add_to_graph(fact) def _add_to_graph(self, fact): """Adds a (subject, predicate, object) RDF triple to the graph. """ self.graph.add(fact) def _populate_setup(self): """Executes operations necessary before the cache can be populated. """ pass def refresh(self): """Refresh the cache by deleting the old one and creating a new one. """ if self.exists: self.delete() self.populate() self.open() @property def _local_storage_path(self): """Returns a path to the on-disk structure of the cache. """ return self.cache_uri @staticmethod def _add_namespaces(graph): """Function to ensure that the graph always has some specific namespace aliases set. """ graph.bind('pgterms', PGTERMS) graph.bind('dcterms', DCTERMS) @contextmanager def _download_metadata_archive(self): """Makes a remote call to the Project Gutenberg servers and downloads the entire Project Gutenberg meta-data catalog. The catalog describes the texts on Project Gutenberg in RDF. The function returns a file-pointer to the catalog. """ with tempfile.NamedTemporaryFile(delete=False) as metadata_archive: shutil.copyfileobj(urlopen(self.catalog_source), metadata_archive) yield metadata_archive.name remove(metadata_archive.name) @classmethod def _metadata_is_invalid(cls, fact): """Determines if the fact is not well formed. """ return any( isinstance(token, URIRef) and ' ' in token for token in fact) @classmethod def _iter_metadata_triples(cls, metadata_archive_path): """Yields all meta-data of Project Gutenberg texts contained in the catalog dump. """ pg_rdf_regex = re.compile(r'pg\d+.rdf$') with closing(tarfile.open(metadata_archive_path)) as metadata_archive: for item in metadata_archive: if pg_rdf_regex.search(item.name): with disable_logging(): extracted = metadata_archive.extractfile(item) graph = Graph().parse(extracted) for fact in graph: if cls._metadata_is_invalid(fact): logging.info('skipping invalid triple %s', fact) else: yield fact
class StoreTestCase(unittest.TestCase): """ Test case for testing store performance... probably should be something other than a unit test... but for now we'll add it as a unit test. """ store = 'default' tmppath = None configString = os.environ.get("DBURI", "dburi") def setUp(self): self.gcold = gc.isenabled() gc.collect() gc.disable() self.graph = Graph(store=self.store) if self.store == "MySQL": # from test.mysql import configString from rdflib.store.MySQL import MySQL path = self.configString MySQL().destroy(path) else: self.tmppath = mkdtemp() self.graph.open(self.tmppath, create=True) self.input = input = Graph() input.parse("http://eikeon.com") def tearDown(self): self.graph.close() if self.gcold: gc.enable() del self.graph shutil.rmtree(self.tmppath) def testTime(self): number = 1 print self.store print "input:", for i in itertools.repeat(None, number): self._testInput() print "random:", for i in itertools.repeat(None, number): self._testRandom() print "." def _testRandom(self): number = len(self.input) store = self.graph def add_random(): s = random_uri() p = random_uri() o = random_uri() store.add((s, p, o)) it = itertools.repeat(None, number) t0 = time() for _i in it: add_random() t1 = time() print "%.3g" % (t1 - t0), def _testInput(self): number = 1 store = self.graph def add_from_input(): for t in self.input: store.add(t) it = itertools.repeat(None, number) t0 = time() for _i in it: add_from_input() t1 = time() print "%.3g" % (t1 - t0),
class SWAnalyzer: def __init__(self, sparql_endpoint, identifier, configstring, store=None, proxy=None, subprocess=True): self.sparql_endpoint = sparql_endpoint self.store = store if store is None: print "Creating SPARQLStore for %s" % self.sparql_endpoint store = SPARQLStore(self.sparql_endpoint) self.graph = Graph(store) else: self.identifier = URIRef(identifier) self.configstring = configstring self.graph = Graph(store, identifier=self.identifier) self.subprocess = subprocess if proxy != None: print "Initilizing proxy..." proxy = urllib2.ProxyHandler({"http": urlparse(proxy).netloc}) opener = urllib2.build_opener(proxy) urllib2.install_opener(opener) # @abc.abstractmethod def open(self): if self.store is not None: self.graph.open(self.configstring, create=True) def close(self): if self.store is not None: self.graph.destroy(self.configstring) self.graph.close() def load_graph(self): self.uri_pattern = self.get_uri_pattern()[1] def get_triples(self): query = "SELECT DISTINCT * { ?s ?p ?o }" qres = self.graph.query(query) return qres.result def get_triples_count(self): query = "SELECT (COUNT(*) AS ?no) { ?s ?p ?o }" qres = self.graph.query(query) return int(qres.result[0][0]) def get_classes(self): query = "SELECT DISTINCT ?class WHERE { [] a ?class }" qres = self.graph.query(query) return qres.result def get_classes_count(self): query = "SELECT COUNT(distinct ?o) AS ?no { ?s rdf:type ?o }" qres = self.graph.query(query) return int(qres.result[0][0]) def get_properties(self): query = "SELECT DISTINCT ?p WHERE { ?s ?p ?o }" qres = self.graph.query(query) return qres.result def get_properties_count(self): query = "SELECT COUNT(distinct ?p) AS ?no WHERE { ?s ?p ?o }" qres = self.graph.query(query) return int(qres.result[0][0]) def get_subjects(self): query = "SELECT DISTINCT ?s WHERE { ?s ?p ?o }" qres = self.graph.query(query) return qres.result def get_subjects_count(self): query = "SELECT COUNT(distinct ?s) WHERE { ?s ?p ?o }" qres = self.graph.query(query) return int(qres.result[0][0]) def get_properties_count(self): query = "SELECT COUNT(distinct ?s) AS ?no WHERE { ?s ?p ?o }" qres = self.graph.query(query) return int(qres.result[0][0]) def get_objects(self): query = "SELECT DISTINCT ?o WHERE { ?s ?p ?o }" qres = self.graph.query(query) return qres.result def get_objects_count(self): query = "SELECT COUNT(distinct ?o) AS ?no WHERE { ?s ?p ?o }" qres = self.graph.query(query) return int(qres.result[0][0]) def get_class_instances(self, class_name): query = "SELECT DISTINCT ?s WHERE { ?s a <" + class_name + "> }" qres = self.graph.query(query) return qres.result def get_class_instances_count(self, class_name): query = "SELECT COUNT(distinct ?s) AS ?no WHERE { ?s a <" + class_name + "> }" qres = self.graph.query(query) return int(qres.result[0][0]) def get_all_classes_instances(self): instances = {} for c in self.get_classes(): clazz = str(c[0].encode("utf-8")) instances[clazz] = self.get_class_instances_count(clazz) return instances def get_all_predicate_triples(self): predicates = {} for p in self.get_properties(): predicate = str(p[0].encode("utf-8")) predicates[predicate] = self.get_property_count(predicate) return predicates def get_property(self, property_name): query = "SELECT * WHERE { ?s <" + property_name + "> ?o }" qres = self.graph.query(query) return qres.result def get_property_count(self, property_name): query = "SELECT (COUNT(*) AS ?no) WHERE { ?s <" + property_name + "> ?o }" qres = self.graph.query(query) return int(qres.result[0][0]) def get_entities(self): query = ( 'SELECT DISTINCT ?s WHERE { ?s a [] . FILTER ((!isBlank(?s)) && regex(str(?s), "^' + self.uri_pattern + '"))}' ) qres = self.graph.query(query) return qres.result def get_entities_count(self): query = ( 'SELECT COUNT(distinct ?s) AS ?no WHERE { ?s a [] . FILTER ((!isBlank(?s)) && regex(str(?s), "^' + self.uri_pattern + '"))}' ) qres = self.graph.query(query) return int(qres.result[0][0]) def get_all_links(self): query = """SELECT * WHERE { ?s ?p ?o . FILTER (!isBlank(?s) && !isBlank(?o) && isIRI(?s) && isIRI(?o) && (str(?p) != "http://www.w3.org/1999/02/22-rdf-syntax-ns#type") && (str(?p) != "http://purl.org/dc/elements/1.1/type"))}""" qres = self.graph.query(query) return qres.result def get_all_links_count(self): query = """SELECT (COUNT(*) AS ?no) WHERE { ?s ?p ?o . FILTER (!isBlank(?s) && !isBlank(?o) && isIRI(?s) && isIRI(?o) && (str(?p) != "http://www.w3.org/1999/02/22-rdf-syntax-ns#type") && (str(?p) != "http://purl.org/dc/elements/1.1/type"))}""" qres = self.graph.query(query) return int(qres.result[0][0]) def get_ingoing_links(self): query = ( '''SELECT * WHERE { ?s ?p ?o . FILTER (!isBlank(?s) && !isBlank(?o) && !regex(str(?s), "''' + self.uri_pattern + '''") && isIRI(?s) && regex(str(?o), "''' + self.uri_pattern + """") && isIRI(?o) && (str(?p) != "http://www.w3.org/1999/02/22-rdf-syntax-ns#type") && (str(?p) != "http://purl.org/dc/elements/1.1/type"))}""" ) qres = self.graph.query(query) return qres.result def get_ingoing_links_count(self): query = ( '''SELECT (COUNT(*) AS ?no) WHERE { ?s ?p ?o . FILTER (!isBlank(?s) && !isBlank(?o) && !regex(str(?s), "''' + self.uri_pattern + '''") && isIRI(?s) && regex(str(?o), "''' + self.uri_pattern + """") && isIRI(?o) && (str(?p) != "http://www.w3.org/1999/02/22-rdf-syntax-ns#type") && (str(?p) != "http://purl.org/dc/elements/1.1/type"))}""" ) qres = self.graph.query(query) return int(qres.result[0][0]) def get_outgoing_links(self): query = ( '''SELECT * WHERE { ?s ?p ?o . FILTER (!isBlank(?s) && !isBlank(?o) && regex(str(?s), "''' + self.uri_pattern + '''") && isIRI(?s) && !regex(str(?o), "''' + self.uri_pattern + """") && isIRI(?o) && (str(?p) != "http://www.w3.org/1999/02/22-rdf-syntax-ns#type") && (str(?p) != "http://purl.org/dc/elements/1.1/type"))}""" ) qres = self.graph.query(query) return qres.result def get_outgoing_links_count(self): query = ( '''SELECT (COUNT(*) AS ?no) WHERE { ?s ?p ?o . FILTER (!isBlank(?s) && !isBlank(?o) && regex(str(?s), "''' + self.uri_pattern + '''") && isIRI(?s) && !regex(str(?o), "''' + self.uri_pattern + """") && isIRI(?o) && (str(?p) != "http://www.w3.org/1999/02/22-rdf-syntax-ns#type") && (str(?p) != "http://purl.org/dc/elements/1.1/type"))}""" ) qres = self.graph.query(query) return int(qres.result[0][0]) def get_inner_links(self): query = ( '''SELECT * WHERE { ?s ?p ?o . FILTER (!isBlank(?s) && !isBlank(?o) && regex(str(?s), "''' + self.uri_pattern + '''") && isIRI(?s) && regex(str(?o), "''' + self.uri_pattern + """") && isIRI(?o) && (str(?p) != "http://www.w3.org/1999/02/22-rdf-syntax-ns#type") && (str(?p) != "http://purl.org/dc/elements/1.1/type"))}""" ) qres = self.graph.query(query) return qres.result def get_inner_links_count(self): query = ( '''SELECT (COUNT(*) AS ?no) WHERE { ?s ?p ?o . FILTER (!isBlank(?s) && !isBlank(?o) && regex(str(?s), "''' + self.uri_pattern + '''") && isIRI(?s) && regex(str(?o), "''' + self.uri_pattern + """") && isIRI(?o) && (str(?p) != "http://www.w3.org/1999/02/22-rdf-syntax-ns#type") && (str(?p) != "http://purl.org/dc/elements/1.1/type"))}""" ) qres = self.graph.query(query) return int(qres.result[0][0]) def get_vocabularies(self): property_list = [str(p[0].encode("utf-8")) for p in self.get_properties()] return self.get_patterns(property_list) def get_uri_pattern(self): subjects = self.get_subjects() subject_list = [] for subject in subjects: subject_list.append(str(subject[0].encode("utf-8"))) return self.get_pattern(subject_list) def get_pattern(self, collection): processes = 10 collection = [e for e in collection if e.find("http://") == 0] result = namespace_finder.find_pattern(collection, branches=processes, subprocesses=False, verbose=False) return result def get_patterns(self, uri_list): temp_list = [] temp_list += uri_list patterns = [] while len(temp_list) > 0: pos = temp_list[0].rfind("#") if pos == -1: pos = temp_list[0].rfind("/") if pos > -1: pattern = temp_list[0][:pos] patterns.append(pattern) temp_list = [e for e in temp_list if not e.startswith(pattern)] return patterns def map_subprocess(self, data): if self.subprocess: pool = Pool(branches) result = pool.map(check_for_semantic, data) pool.close() pool.terminate() return result else: return map(check_for_semantic, data) def get_linksets(self, branches=5): temp_links = self.get_outgoing_links() empty = False out_datasets = [] outgoing_links = [] val = URLValidator(verify_exists=False) for obj in temp_links: try: val(str(obj[0].encode("utf-8"))) outgoing_links.append(str(obj[0].encode("utf-8"))) except: pass while not empty: out_pattern = self.get_pattern(outgoing_links) outgoing_links = [ e for e in outgoing_links if (e.find(out_pattern[1]) != 0) and ((e + "/").find(out_pattern[1]) != 0) ] out_datasets.append(out_pattern[1]) if len(outgoing_links) == 0: empty = True if len(out_datasets) < branches: branches = len(out_datasets) # print len(self.graph) # print self.graph result = self.map_subprocess( zip(out_datasets, repeat(self.uri_pattern), repeat(self.identifier), repeat(self.configstring)) ) # print result linksets = {} for item in result: temp_dict = eval(str(item)) for key in temp_dict.keys(): linksets[key] = temp_dict[key] return linksets
class ProxyStore(Store): """ A Proxy store implemention. :param configuration: Can be a string or a dictionary. May be passed to __init__() or to open(). Specified as a configuration string (store database connection string). For KTBS, it is preferably a dictionary which may contain credentials for HTTP requests, the URI of the graph and an httpresponse supplied by the client (contains an RDF serialized graph already posted with HTTPLIB2 and the header of the response). If the parameters are in a string, the format should be "key1:value1;key2:value2". May be passed to __init__() or to open(). Optionnal. :param identifier: URIRef identifying the graph to cache in the store. See http://www.rdflib.net/store/ for the detail of a store. Take store.py for the squeletton. The real store is on a server accessed with a REST protocol. """ # Already define in the Store class context_aware = False formula_aware = False transaction_aware = False def __init__(self, configuration=None, identifier=None): """ ProxyStore initialization. Creates an empty Graph, intializes the HTTP client. Use the defaut for internal graph storage, i.e IOMemory. The URIref of the graph must be supplied either in identifier or in configuration parameter. It will be checked by open(). The cache file path could be given in the configuration dictionary (__init__ only). We have to search about the memory cache. """ LOG.debug("-- ProxyStore.init(configuration=%s, identifer=%s) --\n", configuration, identifier) self._identifier = identifier self._format = None self._etags = None self._req_headers = {} self.configuration = None configuration = self._configuration_extraction(configuration) self._graph = Graph() # Most important parameter : identifier and graph address # If not given, we can not go further if (identifier is not None) and len(identifier) > 0: if len(configuration) == 0: configuration = {PS_CONFIG_URI: identifier} # Show the network activity if PS_CONFIG_DEBUG_HTTP in configuration.keys(): httplib2.debuglevel = 1 # Use provided Http connection if any http_cx = configuration.get(PS_CONFIG_HTTP_CX) if http_cx is None: http_cx = httplib2.Http() else: assert isinstance(http_cx, httplib2.Http) self.httpserver = http_cx # Store will call open() if configuration is not None Store.__init__(self, configuration) @property def prefered_format(self): """The format that the remote server seems to prefer. Return a tuple (content_type, rdflib_format) """ return _CONTENT_TYPE_SERIALIZERS.get(self._format, "text/turtle"), \ (self._format or "turtle") def open(self, configuration, create=False): """ Opens the store specified by the configuration string. For the ProxyStore, the identifier is the graph address. :param configuration: Usually a configuration string of the store (for database connection). May contain credentials for HTTP requests. Can be a string or a dictionary. May be passed to __init__() or to open(). :param create: True to create a store. This not meaningfull for the ProxyStore. Optionnal. :returns: * VALID_STORE on success * UNKNOWN No identifier or wrong identifier * NO_STORE """ LOG.debug( "-- ProxyStore.open(configuration=%s, create=%s), " "identifier: %s --\n", configuration, create, self._identifier) self.configuration = self._configuration_extraction(configuration) if (self._identifier is None) or len(self._identifier) == 0: if PS_CONFIG_URI in self.configuration.keys(): self._identifier = self.configuration[PS_CONFIG_URI] else: raise StoreIdentifierError(identifier=self._identifier) else: if (PS_CONFIG_URI in self.configuration.keys()) and \ (self._identifier != self.configuration[PS_CONFIG_URI]): raise StoreIdentifierError(identifier=self._identifier) if PS_CONFIG_HTTP_RESPONSE in self.configuration.keys(): # Serialized graph already sent by the client to the server # Populated the graph with the server response, no need to pull # the data from the server again if len(self.configuration[PS_CONFIG_HTTP_RESPONSE]) == 2: self._parse_header(\ self.configuration[PS_CONFIG_HTTP_RESPONSE][0]) self._parse_content(\ self.configuration[PS_CONFIG_HTTP_RESPONSE][1]) return VALID_STORE @staticmethod def _configuration_extraction(configuration): """ Extract configuration data passed to ProxyStore. What do we do if configuration is passed twice (once in __init__ and again in open) ? For the moment, overwrite. For the moment, ignore invalid configuration parameters (no StoreInvalidConfigurationError exception). :param configuration: Usually a configuration string of the store (for database connection). May contain credentials for HTTP requests. Can be a string or a dictionary. May be passed to __init__() or to open(). Optionnal. :returns: A dictionnary with the extracted configuration. """ extracted_configuration = {} # TODO LATER ? if self.configuration is not None: if isinstance(configuration, types.DictType): extracted_configuration = configuration elif isinstance(configuration, types.StringTypes): if len(configuration) > 0: # Expect to get a key1:value1;key2:value2;.... string # If not formatted like this, nothing should be extracted for item in configuration.split(";"): elems = item.split(":") if len(elems) == 2: extracted_configuration[elems[0]] = elems[1] return extracted_configuration def _parse_header(self, header): """ Parses the header of the HTTP request or response. TODO LATER Analyse Content-Type HTTP header to determine the serialization used TODO LATER The serialization must be stored :param header: Header of the HTTP request or response. """ ctype = header.get("content-type", "text/turtle").split(";", 1)[0] self._format = _CONTENT_TYPE_PARSERS[ctype] LOG.debug( "-- ProxyStore._parse_header(), " "content-type=%s, self._format=%s --", ctype, self._format) self._etags = header.get('etag') def _parse_content(self, content): """ Parses the data in the content parameter to build the graph to cache. :param content: HTTP received data either got by ProxyStore or passed by RDFREST Client. """ # Creates the graph LOG.debug("-- ProxyStore._parse_content() using %s format", self._format) parse_format = self._format if parse_format == "nt": parse_format = "n3" # seems to be more efficient!... self.remove((None, None, None), None) # efficiently empties graph # the above is much faster than remove((None, None, None)) self._graph.parse(StringIO(content), format=self._format, publicID=self._identifier) def _pull(self): """Update cache before an operation. This method must be called before each get-type request. """ LOG.debug("-- _pull ... start ...") assert self._identifier is not None, "The store must be open." # TODO SOON - If there is a problem to get the graph (wrong address...) # Set an indication to notify it req_headers = { "accept": ACCEPT, } req_headers.update(self._req_headers) self._req_headers.clear() header, content = self.httpserver.request(self._identifier, headers=req_headers) LOG.debug("[received header]\n%s", header) # TODO SOON Refine, test and define use-cases # httplib2 raises a httplib2.ServerNotFoundError exception when ... # Throw a ResourceAccessError exception in case of HTTP 404 as we have # no better mean at the moment if header.status == httplib.NOT_FOUND: raise ResourceAccessError(header.status, self._identifier, self.configuration) if not header.fromcache or self._format is None: LOG.debug("[received content]\n%s", content) if self._format is None: LOG.debug("Creating proxy graph ....") else: LOG.debug("Updating proxy graph ....") self._parse_header(header) self._parse_content(content) else: LOG.debug("Proxy graph is up to date ...") LOG.debug("-- _pull() ... stop ...") def force_refresh(self, clear_cache=False): """Forces the cache to be updated with HTTP specific headers. If `clear_cache` is False (default), etags will still be used, so the server may reply with a 304 Not Changed. If `clear_cache` is True, the cache will be cleared, so the content will have to be resent by the server. """ LOG.debug("-- force_refresh called ()") if clear_cache: self._req_headers = { "Cache-Control": "no-cache", } else: self._req_headers = { "Cache-Control": "max-age=0", } def _push(self): """ Send data to server. Apply the modifications on the cache, trigger an exception if data has already been modified on the server. """ LOG.debug("-- _push() ... start ... --") assert self._identifier is not None, "The store must be open." # TODO SOON : How to build the "PUT" request ? # Which data in the header ? # Which serialization ? The same as we received but does rdflib supply # all kind of parsing / serialization ? headers = { 'Content-Type': '%s; charset=UTF-8' % _CONTENT_TYPE_SERIALIZERS[self._format], 'Accept': ACCEPT, } if self._etags: headers['If-Match'] = self._etags data = self._graph.serialize(format=self._format) LOG.debug("[sent headers]\n%s", headers) LOG.debug("[sent data]\n%s", data) # TODO SOON : Analyze the server response # The server will tell if the graph has changed # The server will supply new ETags ... update the data with the # response rheader, rcontent = self.httpserver.request(self._identifier, 'PUT', data, headers=headers) LOG.debug("[response header]\n%s", rheader) LOG.debug("[response content]\n%s", rcontent) if rheader.status in (httplib.OK, ): self._parse_header(rheader) elif rheader.status in (httplib.PRECONDITION_FAILED, ): raise GraphChangedError(url=self._identifier, msg=rheader.status) elif str(rheader.status)[0] == "5": raise ServerError(url=self._identifier, msg=rheader.status) else: raise RuntimeError( "%s: %s %s\n%s" % (self._identifier, rheader.status, rheader.reason, rcontent)) LOG.debug("-- _push() ... stop ... --") def add(self, triple, context=None, quoted=False): """ Add a triple to the store. Apply the modifications on the cache, trigger an exception if data has already been modified on the server. :param triple: Triple (subject, predicate, object) to add. :param context: :param quoted: The quoted argument is interpreted by formula-aware stores to indicate this statement is quoted/hypothetical. It should be an error to not specify a context and have the quoted argument be True. It should also be an error for the quoted argument to be True when the store is not formula-aware. :returns: """ LOG.debug("-- ProxyStore.add(triple=%s, context=%s, quoted=%s) --", triple, context, quoted) assert self._identifier is not None, "The store must be open." # TODO LATER : Wrong, assert is made to test bugs assert self._format is not None, "The store must be open." assert quoted == False, "The store -proxyStore- is not formula-aware" Store.add(self, triple, context, quoted) # Instruction suivant extraite du plugin Sleepycat # Store.add(self, (subject, predicate, object), context, quoted) self._graph.add(triple) def remove(self, triple, context): """Remove the set of triples matching the pattern from the store :param triple: Triple (subject, predicate, object) to remove. :param context: :returns: """ # pylint: disable-msg=W0222 # Signature differs from overriden method LOG.debug("-- ProxyStore.remove(triple=%s, context=%s) --", triple, context) Store.remove(self, triple, context) if triple == (None, None, None): self._graph = Graph() # the default implementation of Graph is not efficient in doing # this, so better create a new empty one else: self._graph.store.remove(triple) def triples(self, triple, context=None): """ Returns an iterator over all the triples (within the conjunctive graph or just the given context) matching the given pattern. :param triple: Triple (subject, predicate, object) to remove. :param context: ProxyStore is not context aware but it's internal cache IOMemory store is. Avoid context parameter. :returns: An iterator over the triples. """ LOG.debug("-- ProxyStore.triples(triple=%s, context=%s) --", triple, context) Store.triples(self, triple) #, context=None) self._pull() return self._graph.store.triples(triple) #, context=None) def __len__(self, context=None): """ Number of statements in the store. :returns: The number of statements in the store. """ self._pull() ret = len(self._graph) LOG.debug("******** __len__ : ProxyStore, nb statements %d", ret) return ret # ---------- Formula / Context Interfaces ---------- #def contexts(self, triple=None): # Generator over all contexts in the graph. If triple is specified, a # generator over all contexts the triple is in. #def remove_context(self, identifier) # ---------- Formula / Context Interfaces ---------- # ---------- Optional Transactional methods ---------- def commit(self): """ Sends the modifications to the server. """ self._push() def rollback(self): """ Cancel the modifications. Get the graph from the server. """ self._pull() # ---------- Optional Transactional methods ---------- def close(self, commit_pending_transaction=False): """ This closes the database connection. :param commit_pending_transaction: Specifies whether to commit all pending transactions before closing (if the store is transactional). """ LOG.debug("******** close (%s) ", commit_pending_transaction) self._identifier = None self._etags = None self.configuration = None self._format = None self._graph.close() self.httpserver.clear_credentials() def destroy(self, configuration): """ This destroys the instance of the store identified by the configuration string. :param configuration: Configuration string identifying the store """ LOG.debug("******** destroy (%s) ", configuration) def query(self, query, initNs=None, initBindings=None, queryGraph=None, **kw): """ I provide SPARQL query processing as a store. I simply pass through the query to the underlying graph. This prevents an external SPARQL engine to make multiple accesses to that store, which can generate HTTP traffic. """ # initNs and initBindings are invalid names for pylint (C0103), but # method `query` is specified by rdflib, so #pylint: disable=C0103 if initNs is None: initNs = {} if initBindings is None: initBindings = {} self._pull() return self._graph.query(query, initNs=initNs, initBindings=initBindings, **kw)
class ProxyStore(Store): """ A Proxy store implemention. :param configuration: Can be a string or a dictionary. May be passed to __init__() or to open(). Specified as a configuration string (store database connection string). For KTBS, it is preferably a dictionary which may contain credentials for HTTP requests, the URI of the graph and an httpresponse supplied by the client (contains an RDF serialized graph already posted with HTTPLIB2 and the header of the response). If the parameters are in a string, the format should be "key1:value1;key2:value2". May be passed to __init__() or to open(). Optionnal. :param identifier: URIRef identifying the graph to cache in the store. See http://www.rdflib.net/store/ for the detail of a store. Take store.py for the squeletton. The real store is on a server accessed with a REST protocol. """ # Already define in the Store class context_aware = False formula_aware = False transaction_aware = False def __init__(self, configuration=None, identifier=None): """ ProxyStore initialization. Creates an empty Graph, intializes the HTTP client. Use the defaut for internal graph storage, i.e IOMemory. The URIref of the graph must be supplied either in identifier or in configuration parameter. It will be checked by open(). The cache file path could be given in the configuration dictionary (__init__ only). We have to search about the memory cache. """ LOG.debug("-- ProxyStore.init(configuration=%s, identifer=%s) --\n", configuration, identifier) self._identifier = identifier self._format = None self._etags = None self._req_headers = {} self.configuration = None configuration = self._configuration_extraction(configuration) self._graph = Graph() # Most important parameter : identifier and graph address # If not given, we can not go further if (identifier is not None) and len(identifier) > 0: if len(configuration) == 0: configuration = {PS_CONFIG_URI: identifier} # Show the network activity if PS_CONFIG_DEBUG_HTTP in configuration.keys(): httplib2.debuglevel = 1 # File path for HTTPLIB2 cache # As it is a file cache, it is conserved between two executions # Should we delete the directory on application end (i.e close()) ? if PS_CONFIG_HTTP_CACHE in configuration.keys(): self.httpserver = httplib2.Http(configuration[PS_CONFIG_HTTP_CACHE]) else: self.httpserver = httplib2.Http(CACHE_DIR) # Store will call open() if configuration is not None Store.__init__(self, configuration) @property def prefered_format(self): """The format that the remote server seems to prefer. Return a tuple (content_type, rdflib_format) """ return _CONTENT_TYPE_SERIALIZERS.get(self._format, "text/turtle"), \ (self._format or "turtle") def open(self, configuration, create=False): """ Opens the store specified by the configuration string. For the ProxyStore, the identifier is the graph address. :param configuration: Usually a configuration string of the store (for database connection). May contain credentials for HTTP requests. Can be a string or a dictionary. May be passed to __init__() or to open(). :param create: True to create a store. This not meaningfull for the ProxyStore. Optionnal. :returns: * VALID_STORE on success * UNKNOWN No identifier or wrong identifier * NO_STORE """ LOG.debug("-- ProxyStore.open(configuration=%s, create=%s), " "identifier: %s --\n", configuration, create, self._identifier) self.configuration = self._configuration_extraction(configuration) if (self._identifier is None) or len(self._identifier) == 0: if PS_CONFIG_URI in self.configuration.keys(): self._identifier = self.configuration[PS_CONFIG_URI] else: raise StoreIdentifierError(identifier=self._identifier) else: if (PS_CONFIG_URI in self.configuration.keys()) and \ (self._identifier != self.configuration[PS_CONFIG_URI]): raise StoreIdentifierError(identifier=self._identifier) if PS_CONFIG_USER in self.configuration.keys() and \ PS_CONFIG_PWD in self.configuration.keys(): self.httpserver.add_credentials(self.configuration[PS_CONFIG_USER], self.configuration[PS_CONFIG_PWD]) if PS_CONFIG_HTTP_RESPONSE in self.configuration.keys(): # Serialized graph already sent by the client to the server # Populated the graph with the server response, no need to pull # the data from the server again if len(self.configuration[PS_CONFIG_HTTP_RESPONSE]) == 2: self._parse_header(\ self.configuration[PS_CONFIG_HTTP_RESPONSE][0]) self._parse_content(\ self.configuration[PS_CONFIG_HTTP_RESPONSE][1]) return VALID_STORE @staticmethod def _configuration_extraction(configuration): """ Extract configuration data passed to ProxyStore. What do we do if configuration is passed twice (once in __init__ and again in open) ? For the moment, overwrite. For the moment, ignore invalid configuration parameters (no StoreInvalidConfigurationError exception). :param configuration: Usually a configuration string of the store (for database connection). May contain credentials for HTTP requests. Can be a string or a dictionary. May be passed to __init__() or to open(). Optionnal. :returns: A dictionnary with the extracted configuration. """ extracted_configuration = {} # TODO LATER ? if self.configuration is not None: if isinstance(configuration, types.DictType): extracted_configuration = configuration elif isinstance(configuration, types.StringTypes): if len(configuration) > 0: # Expect to get a key1:value1;key2:value2;.... string # If not formatted like this, nothing should be extracted for item in configuration.split(";"): elems = item.split(":") if len(elems) == 2: extracted_configuration[elems[0]] = elems[1] return extracted_configuration def _parse_header(self, header): """ Parses the header of the HTTP request or response. TODO LATER Analyse Content-Type HTTP header to determine the serialization used TODO LATER The serialization must be stored :param header: Header of the HTTP request or response. """ ctype = header.get("content-type", "text/turtle").split(";", 1)[0] self._format = _CONTENT_TYPE_PARSERS[ctype] LOG.debug("-- ProxyStore._parse_header(), " "content-type=%s, self._format=%s --", ctype, self._format) self._etags = header.get('etag') def _parse_content(self, content): """ Parses the data in the content parameter to build the graph to cache. :param content: HTTP received data either got by ProxyStore or passed by RDFREST Client. """ # Creates the graph LOG.debug("-- ProxyStore._parse_content() using %s format", self._format) parse_format = self._format if parse_format == "nt": parse_format = "n3" # seems to be more efficient!... self.remove((None, None, None), None) # efficiently empties graph # the above is much faster than remove((None, None, None)) self._graph.parse(StringIO(content), format=self._format, publicID=self._identifier) def _pull(self): """Update cache before an operation. This method must be called before each get-type request. """ LOG.debug("-- _pull ... start ...") assert self._identifier is not None, "The store must be open." # TODO SOON - If there is a problem to get the graph (wrong address...) # Set an indication to notify it req_headers = { "accept": ACCEPT, } req_headers.update(self._req_headers) self._req_headers.clear() header, content = self.httpserver.request(self._identifier, headers=req_headers) LOG.debug("[received header]\n%s", header) # TODO SOON Refine, test and define use-cases # httplib2 raises a httplib2.ServerNotFoundError exception when ... # Throw a ResourceAccessError exception in case of HTTP 404 as we have # no better mean at the moment if header.status == httplib.NOT_FOUND: raise ResourceAccessError(header.status, self._identifier, self.configuration) if not header.fromcache or self._format is None: LOG.debug("[received content]\n%s", content) if self._format is None: LOG.debug("Creating proxy graph ....") else: LOG.debug("Updating proxy graph ....") self._parse_header(header) self._parse_content(content) else: LOG.debug("Proxy graph is up to date ...") LOG.debug("-- _pull() ... stop ...") def force_refresh(self, clear_cache=False): """Forces the cache to be updated with HTTP specific headers. If `clear_cache` is False (default), etags will still be used, so the server may reply with a 304 Not Changed. If `clear_cache` is True, the cache will be cleared, so the content will have to be resent by the server. """ LOG.debug("-- force_refresh called ()") if clear_cache: self._req_headers = { "Cache-Control" : "no-cache", } else: self._req_headers = { "Cache-Control" : "max-age=0", } def _push(self): """ Send data to server. Apply the modifications on the cache, trigger an exception if data has already been modified on the server. """ LOG.debug("-- _push() ... start ... --") assert self._identifier is not None, "The store must be open." # TODO SOON : How to build the "PUT" request ? # Which data in the header ? # Which serialization ? The same as we received but does rdflib supply # all kind of parsing / serialization ? headers = {'Content-Type': '%s; charset=UTF-8' % _CONTENT_TYPE_SERIALIZERS[self._format], 'Accept': ACCEPT, } if self._etags: headers['If-Match'] = self._etags data = self._graph.serialize(format=self._format) LOG.debug("[sent headers]\n%s", headers) LOG.debug("[sent data]\n%s", data) # TODO SOON : Analyze the server response # The server will tell if the graph has changed # The server will supply new ETags ... update the data with the # response rheader, rcontent = self.httpserver.request(self._identifier, 'PUT', data, headers=headers) LOG.debug("[response header]\n%s", rheader) LOG.debug("[response content]\n%s", rcontent) if rheader.status in (httplib.OK,): self._parse_header(rheader) elif rheader.status in (httplib.PRECONDITION_FAILED,): raise GraphChangedError(url=self._identifier, msg=rheader.status) elif str(rheader.status)[0] == "5": raise ServerError(url=self._identifier, msg=rheader.status) else: raise RuntimeError("%s: %s %s\n%s" % (self._identifier, rheader.status, rheader.reason, rcontent)) LOG.debug("-- _push() ... stop ... --") def add(self, triple, context=None, quoted=False): """ Add a triple to the store. Apply the modifications on the cache, trigger an exception if data has already been modified on the server. :param triple: Triple (subject, predicate, object) to add. :param context: :param quoted: The quoted argument is interpreted by formula-aware stores to indicate this statement is quoted/hypothetical. It should be an error to not specify a context and have the quoted argument be True. It should also be an error for the quoted argument to be True when the store is not formula-aware. :returns: """ LOG.debug("-- ProxyStore.add(triple=%s, context=%s, quoted=%s) --", triple, context, quoted) assert self._identifier is not None, "The store must be open." # TODO LATER : Wrong, assert is made to test bugs assert self._format is not None, "The store must be open." assert quoted == False, "The store -proxyStore- is not formula-aware" Store.add(self, triple, context, quoted) # Instruction suivant extraite du plugin Sleepycat # Store.add(self, (subject, predicate, object), context, quoted) self._graph.add(triple) def remove(self, triple, context): """Remove the set of triples matching the pattern from the store :param triple: Triple (subject, predicate, object) to remove. :param context: :returns: """ # pylint: disable-msg=W0222 # Signature differs from overriden method LOG.debug("-- ProxyStore.remove(triple=%s, context=%s) --", triple, context) Store.remove(self, triple, context) if triple == (None, None, None): self._graph = Graph() # the default implementation of Graph is not efficient in doing # this, so better create a new empty one else: self._graph.store.remove(triple) def triples(self, triple, context=None): """ Returns an iterator over all the triples (within the conjunctive graph or just the given context) matching the given pattern. :param triple: Triple (subject, predicate, object) to remove. :param context: ProxyStore is not context aware but it's internal cache IOMemory store is. Avoid context parameter. :returns: An iterator over the triples. """ LOG.debug("-- ProxyStore.triples(triple=%s, context=%s) --", triple, context) Store.triples(self, triple) #, context=None) self._pull() return self._graph.store.triples(triple) #, context=None) def __len__(self, context=None): """ Number of statements in the store. :returns: The number of statements in the store. """ self._pull() ret = len(self._graph) LOG.debug("******** __len__ : ProxyStore, nb statements %d", ret) return ret # ---------- Formula / Context Interfaces ---------- #def contexts(self, triple=None): # Generator over all contexts in the graph. If triple is specified, a # generator over all contexts the triple is in. #def remove_context(self, identifier) # ---------- Formula / Context Interfaces ---------- # ---------- Optional Transactional methods ---------- def commit(self): """ Sends the modifications to the server. """ self._push() def rollback(self): """ Cancel the modifications. Get the graph from the server. """ self._pull() # ---------- Optional Transactional methods ---------- def close(self, commit_pending_transaction=False): """ This closes the database connection. :param commit_pending_transaction: Specifies whether to commit all pending transactions before closing (if the store is transactional). """ LOG.debug("******** close (%s) ", commit_pending_transaction) self._identifier = None self._etags = None self.configuration = None self._format = None self._graph.close() self.httpserver.clear_credentials() def destroy(self, configuration): """ This destroys the instance of the store identified by the configuration string. :param configuration: Configuration string identifying the store """ LOG.debug("******** destroy (%s) ", configuration) def query(self, query, initNs=None, initBindings=None, queryGraph=None, **kw): """ I provide SPARQL query processing as a store. I simply pass through the query to the underlying graph. This prevents an external SPARQL engine to make multiple accesses to that store, which can generate HTTP traffic. """ # initNs and initBindings are invalid names for pylint (C0103), but # method `query` is specified by rdflib, so #pylint: disable=C0103 if initNs is None: initNs = {} if initBindings is None: initBindings = {} self._pull() return self._graph.query(query, initNs=initNs, initBindings=initBindings, **kw)
class TestLevelDBGraphCore(unittest.TestCase): def setUp(self): store = "LevelDB" self.graph = Graph(store=store) self.path = configString self.graph.open(self.path, create=True) def tearDown(self): self.graph.destroy(self.path) try: self.graph.close() except: pass if getattr(self, 'path', False) and self.path is not None: if os.path.exists(self.path): if os.path.isdir(self.path): shutil.rmtree(self.path) elif len(self.path.split(':')) == 1: os.unlink(self.path) else: os.remove(self.path) def test_namespaces(self): self.graph.bind("dc", "http://http://purl.org/dc/elements/1.1/") self.graph.bind("foaf", "http://xmlns.com/foaf/0.1/") self.assert_(len(list(self.graph.namespaces())) == 5) self.assert_(('foaf', rdflib.term.URIRef(u'http://xmlns.com/foaf/0.1/') ) in list(self.graph.namespaces())) def test_readable_index(self): print(readable_index(111)) def test_create_db(self): michel = rdflib.URIRef(u'michel') likes = rdflib.URIRef(u'likes') pizza = rdflib.URIRef(u'pizza') cheese = rdflib.URIRef(u'cheese') self.graph.add((michel, likes, pizza)) self.graph.add((michel, likes, cheese)) self.graph.commit() self.graph.store.close() if getattr(self, 'path', False) and self.path is not None: if os.path.exists(self.path): if os.path.isdir(self.path): shutil.rmtree(self.path) elif len(self.path.split(':')) == 1: os.unlink(self.path) else: os.remove(self.path) self.graph.store.open(self.path, create=True) ntriples = self.graph.triples((None, None, None)) self.assert_(len(list(ntriples)) == 0) def test_missing_db_exception(self): self.graph.store.close() if getattr(self, 'path', False) and self.path is not None: if os.path.exists(self.path): if os.path.isdir(self.path): shutil.rmtree(self.path) elif len(self.path.split(':')) == 1: os.unlink(self.path) else: os.remove(self.path) self.graph.store.open(self.path, create=True) ntriples = self.graph.triples((None, None, None)) self.assert_(len(list(ntriples)) == 0) def test_reopening_db(self): michel = rdflib.URIRef(u'michel') likes = rdflib.URIRef(u'likes') pizza = rdflib.URIRef(u'pizza') cheese = rdflib.URIRef(u'cheese') self.graph.add((michel, likes, pizza)) self.graph.add((michel, likes, cheese)) self.graph.commit() self.graph.store.close() self.graph.store.open(self.path, create=False) ntriples = self.graph.triples((None, None, None)) self.assert_(len(list(ntriples)) == 2) def test_reopening_missing_db(self): self.graph.store.close() self.assertRaises(ValueError, self.graph.store.open, ('/tmp/NotAnExistingDB'), create=False) def test_isopen_db(self): self.assert_(self.graph.store.is_open() == True) self.graph.store.close() self.assert_(self.graph.store.is_open() == False)
class PostgreSQLStoreTests(unittest.TestCase): storetest = True store_name = "PostgreSQL" path = configString create = True def setUp(self): self.graph = Graph(store=self.store_name) if isinstance(self.path, type(None)): if self.store_name == "SQLite": self.path = mkstemp(prefix='test',dir='/tmp') else: self.path = mkdtemp(prefix='test',dir='/tmp') self.graph.store.identifier = self.identifier self.graph.open(self.path, create=self.create) def tearDown(self): # self.graph.destroy(self.path) self.graph.close() import os if hasattr(self,'path') and self.path is not None: if os.path.exists(self.path): if os.path.isdir(self.path): for f in os.listdir(self.path): os.unlink(self.path+'/'+f) os.rmdir(self.path) elif len(self.path.split(':')) == 1: os.unlink(self.path) else: os.remove(self.path) def test_PostgreSQL_testN3_store(self): testN3Store('PostgreSQL',configString) def testRegex(self): # raise SkipTest("Known issue.") g = self.graph g.parse(data=testN3, format="n3") try: for s,p,o in g.triples((None,implies,None)): formulaA = s formulaB = o assert type(formulaA)==QuotedGraph and type(formulaB)==QuotedGraph a = URIRef('http://test/a') b = URIRef('http://test/b') c = URIRef('http://test/c') d = URIRef('http://test/d') universe = ConjunctiveGraph(g.store) #REGEX triple matching assert len(list(universe.triples((None,REGEXTerm('.*22-rdf-syntax-ns.*'),None))))==1 assert len(list(universe.triples((None,REGEXTerm('.*'),None))))==3 assert len(list(universe.triples((REGEXTerm('.*formula.*$'),None,None))))==1 assert len(list(universe.triples((None,None,REGEXTerm('.*formula.*$')))))==1 assert len(list(universe.triples((None,REGEXTerm('.*implies$'),None))))==1 for s,p,o in universe.triples((None,REGEXTerm('.*test.*'),None)): assert s==a assert o==c for s,p,o in formulaA.triples((None,REGEXTerm('.*type.*'),None)): assert o!=c or isinstance(o,BNode) #REGEX context matching assert len(list(universe.contexts((None,None,REGEXTerm('.*schema.*')))))==1 assert len(list(universe.contexts((None,REGEXTerm('.*'),None))))==3 #test optimized interfaces assert len(list(g.store.subjects(RDF.type,[RDFS.Class,c])))==1 for subj in g.store.subjects(RDF.type,[RDFS.Class,c]): assert isinstance(subj,BNode) assert len(list(g.store.subjects(implies,[REGEXTerm('.*')])))==1 for subj in g.store.subjects(implies,[formulaB,RDFS.Class]): assert subj.identifier == formulaA.identifier assert len(list(g.store.subjects(REGEXTerm('.*'),[formulaB,c])))==2 assert len(list(g.store.subjects(None,[formulaB,c])))==2 assert len(list(g.store.subjects(None,[formulaB,c])))==2 assert len(list(g.store.subjects([REGEXTerm('.*rdf-syntax.*'),d],None)))==2 assert len(list(g.store.objects(None,RDF.type)))==1 assert len(list(g.store.objects(a,[d,RDF.type])))==1 assert len(list(g.store.objects(a,[d])))==1 assert len(list(g.store.objects(a,None)))==1 assert len(list(g.store.objects(a,[REGEXTerm('.*')])))==1 assert len(list(g.store.objects([a,c],None)))==1 except: g.store.destroy(configString) raise
class GraphTestCase(unittest.TestCase): store_name = 'default' path = None def setUp(self): self.graph = Graph(store=self.store_name) a_tmp_dir = mkdtemp() self.path = self.path or a_tmp_dir self.graph.open(self.path) self.michel = URIRef(u'michel') self.tarek = URIRef(u'tarek') self.bob = URIRef(u'bob') self.likes = URIRef(u'likes') self.hates = URIRef(u'hates') self.pizza = URIRef(u'pizza') self.cheese = URIRef(u'cheese') def tearDown(self): self.graph.close() def addStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese self.graph.add((tarek, likes, pizza)) self.graph.add((tarek, likes, cheese)) self.graph.add((michel, likes, pizza)) self.graph.add((michel, likes, cheese)) self.graph.add((bob, likes, cheese)) self.graph.add((bob, hates, pizza)) self.graph.add((bob, hates, michel)) # gasp! def removeStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese self.graph.remove((tarek, likes, pizza)) self.graph.remove((tarek, likes, cheese)) self.graph.remove((michel, likes, pizza)) self.graph.remove((michel, likes, cheese)) self.graph.remove((bob, likes, cheese)) self.graph.remove((bob, hates, pizza)) self.graph.remove((bob, hates, michel)) # gasp! def testAdd(self): self.addStuff() def testRemove(self): self.addStuff() self.removeStuff() def testTriples(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese asserte = self.assertEquals triples = self.graph.triples Any = None self.addStuff() # unbound subjects asserte(len(list(triples((Any, likes, pizza)))), 2) asserte(len(list(triples((Any, hates, pizza)))), 1) asserte(len(list(triples((Any, likes, cheese)))), 3) asserte(len(list(triples((Any, hates, cheese)))), 0) # unbound objects asserte(len(list(triples((michel, likes, Any)))), 2) asserte(len(list(triples((tarek, likes, Any)))), 2) asserte(len(list(triples((bob, hates, Any)))), 2) asserte(len(list(triples((bob, likes, Any)))), 1) # unbound predicates asserte(len(list(triples((michel, Any, cheese)))), 1) asserte(len(list(triples((tarek, Any, cheese)))), 1) asserte(len(list(triples((bob, Any, pizza)))), 1) asserte(len(list(triples((bob, Any, michel)))), 1) # unbound subject, objects asserte(len(list(triples((Any, hates, Any)))), 2) asserte(len(list(triples((Any, likes, Any)))), 5) # unbound predicates, objects asserte(len(list(triples((michel, Any, Any)))), 2) asserte(len(list(triples((bob, Any, Any)))), 3) asserte(len(list(triples((tarek, Any, Any)))), 2) # unbound subjects, predicates asserte(len(list(triples((Any, Any, pizza)))), 3) asserte(len(list(triples((Any, Any, cheese)))), 3) asserte(len(list(triples((Any, Any, michel)))), 1) # all unbound asserte(len(list(triples((Any, Any, Any)))), 7) self.removeStuff() asserte(len(list(triples((Any, Any, Any)))), 0) def testStatementNode(self): graph = self.graph from rdflib.term import Statement c = URIRef("http://example.org/foo#c") r = URIRef("http://example.org/foo#r") s = Statement((self.michel, self.likes, self.pizza), c) graph.add((s, RDF.value, r)) self.assertEquals(r, graph.value(s, RDF.value)) self.assertEquals(s, graph.value(predicate=RDF.value, object=r)) def testGraphValue(self): from rdflib.graph import GraphValue graph = self.graph alice = URIRef("alice") bob = URIRef("bob") pizza = URIRef("pizza") cheese = URIRef("cheese") g1 = Graph() g1.add((alice, RDF.value, pizza)) g1.add((bob, RDF.value, cheese)) g1.add((bob, RDF.value, pizza)) g2 = Graph() g2.add((bob, RDF.value, pizza)) g2.add((bob, RDF.value, cheese)) g2.add((alice, RDF.value, pizza)) gv1 = GraphValue(store=graph.store, graph=g1) gv2 = GraphValue(store=graph.store, graph=g2) graph.add((gv1, RDF.value, gv2)) v = graph.value(gv1) #print type(v) self.assertEquals(gv2, v) #print list(gv2) #print gv2.identifier graph.remove((gv1, RDF.value, gv2)) def testConnected(self): graph = self.graph self.addStuff() self.assertEquals(True, graph.connected()) jeroen = URIRef("jeroen") unconnected = URIRef("unconnected") graph.add((jeroen,self.likes,unconnected)) self.assertEquals(False, graph.connected()) def testSub(self): g1=Graph() g2=Graph() tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese g1.add((tarek, likes, pizza)) g1.add((bob, likes, cheese)) g2.add((bob, likes, cheese)) g3=g1-g2 self.assertEquals(len(g3), 1) self.assertEquals((tarek, likes, pizza) in g3, True) self.assertEquals((tarek, likes, cheese) in g3, False) self.assertEquals((bob, likes, cheese) in g3, False) g1-=g2 self.assertEquals(len(g1), 1) self.assertEquals((tarek, likes, pizza) in g1, True) self.assertEquals((tarek, likes, cheese) in g1, False) self.assertEquals((bob, likes, cheese) in g1, False) def testGraphAdd(self): g1=Graph() g2=Graph() tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese g1.add((tarek, likes, pizza)) g2.add((bob, likes, cheese)) g3=g1+g2 self.assertEquals(len(g3), 2) self.assertEquals((tarek, likes, pizza) in g3, True) self.assertEquals((tarek, likes, cheese) in g3, False) self.assertEquals((bob, likes, cheese) in g3, True) g1+=g2 self.assertEquals(len(g1), 2) self.assertEquals((tarek, likes, pizza) in g1, True) self.assertEquals((tarek, likes, cheese) in g1, False) self.assertEquals((bob, likes, cheese) in g1, True) def testGraphIntersection(self): g1=Graph() g2=Graph() tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese g1.add((tarek, likes, pizza)) g1.add((michel, likes, cheese)) g2.add((bob, likes, cheese)) g2.add((michel, likes, cheese)) g3=g1*g2 self.assertEquals(len(g3), 1) self.assertEquals((tarek, likes, pizza) in g3, False) self.assertEquals((tarek, likes, cheese) in g3, False) self.assertEquals((bob, likes, cheese) in g3, False) self.assertEquals((michel, likes, cheese) in g3, True) g1*=g2 self.assertEquals(len(g1), 1) self.assertEquals((tarek, likes, pizza) in g1, False) self.assertEquals((tarek, likes, cheese) in g1, False) self.assertEquals((bob, likes, cheese) in g1, False) self.assertEquals((michel, likes, cheese) in g1, True) def testFinalNewline(self): """ http://code.google.com/p/rdflib/issues/detail?id=5 """ failed = set() for p in rdflib.plugin.plugins(None, rdflib.plugin.Serializer): v = self.graph.serialize(format=p.name) lines = v.split("\n") if "\n" not in v or (lines[-1]!=''): failed.add(p.name) self.assertEqual(len(failed), 0, "No final newline for formats: '%s'" % failed) def testConNeg(self): thread.start_new_thread(runHttpServer, tuple()) # hang on a second while server starts time.sleep(1) self.graph.parse("http://localhost:12345/foo", format="xml") self.graph.parse("http://localhost:12345/foo", format="n3") self.graph.parse("http://localhost:12345/foo", format="nt")
class GraphTest(test.TestCase): """ Testing the basic graph functionality. Heavily based on https://github.com/RDFLib/rdflib-postgresql/blob/master/test/graph_case.py """ store_name = "Django" storetest = True path = '' create = True michel = URIRef(u'michel') tarek = URIRef(u'tarek') bob = URIRef(u'bob') likes = URIRef(u'likes') hates = URIRef(u'hates') pizza = URIRef(u'pizza') cheese = URIRef(u'cheese') def setUp(self): self.graph = Graph(store=self.store_name) self.graph.destroy(self.path) self.graph.open(self.path, create=self.create) def tearDown(self): self.graph.destroy(self.path) self.graph.close() def addStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese self.graph.add((tarek, likes, pizza)) self.graph.add((tarek, likes, cheese)) self.graph.add((michel, likes, pizza)) self.graph.add((michel, likes, cheese)) self.graph.add((bob, likes, cheese)) self.graph.add((bob, hates, pizza)) self.graph.add((bob, hates, michel)) self.graph.commit() def removeStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese self.graph.remove((tarek, likes, pizza)) self.graph.remove((tarek, likes, cheese)) self.graph.remove((michel, likes, pizza)) self.graph.remove((michel, likes, cheese)) self.graph.remove((bob, likes, cheese)) self.graph.remove((bob, hates, pizza)) self.graph.remove((bob, hates, michel)) def testAdd(self): self.addStuff() def testRemove(self): self.addStuff() self.removeStuff() def testTriples(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese triples = self.graph.triples Any = None self.addStuff() # unbound subjects self.assertEquals(len(list(triples((Any, likes, pizza)))), 2) self.assertEquals(len(list(triples((Any, hates, pizza)))), 1) self.assertEquals(len(list(triples((Any, likes, cheese)))), 3) self.assertEquals(len(list(triples((Any, hates, cheese)))), 0) # unbound objects self.assertEquals(len(list(triples((michel, likes, Any)))), 2) self.assertEquals(len(list(triples((tarek, likes, Any)))), 2) self.assertEquals(len(list(triples((bob, hates, Any)))), 2) self.assertEquals(len(list(triples((bob, likes, Any)))), 1) # unbound predicates self.assertEquals(len(list(triples((michel, Any, cheese)))), 1) self.assertEquals(len(list(triples((tarek, Any, cheese)))), 1) self.assertEquals(len(list(triples((bob, Any, pizza)))), 1) self.assertEquals(len(list(triples((bob, Any, michel)))), 1) # unbound subject, objects self.assertEquals(len(list(triples((Any, hates, Any)))), 2) self.assertEquals(len(list(triples((Any, likes, Any)))), 5) # unbound predicates, objects self.assertEquals(len(list(triples((michel, Any, Any)))), 2) self.assertEquals(len(list(triples((bob, Any, Any)))), 3) self.assertEquals(len(list(triples((tarek, Any, Any)))), 2) # unbound subjects, predicates self.assertEquals(len(list(triples((Any, Any, pizza)))), 3) self.assertEquals(len(list(triples((Any, Any, cheese)))), 3) self.assertEquals(len(list(triples((Any, Any, michel)))), 1) # all unbound self.assertEquals(len(list(triples((Any, Any, Any)))), 7) self.removeStuff() self.assertEquals(len(list(triples((Any, Any, Any)))), 0) def testConnected(self): graph = self.graph self.addStuff() self.assertEquals(True, graph.connected()) jeroen = URIRef("jeroen") unconnected = URIRef("unconnected") graph.add((jeroen, self.likes, unconnected)) self.assertEquals(False, graph.connected()) def testSub(self): g1 = Graph() g2 = Graph() g1.add((self.tarek, self.likes, self.pizza)) g1.add((self.bob, self.likes, self.cheese)) g2.add((self.bob, self.likes, self.cheese)) g3 = g1 - g2 self.assertEquals(len(g3), 1) self.assertEquals((self.tarek, self.likes, self.pizza) in g3, True) self.assertEquals((self.tarek, self.likes, self.cheese) in g3, False) self.assertEquals((self.bob, self.likes, self.cheese) in g3, False) g1 -= g2 self.assertEquals(len(g1), 1) self.assertEquals((self.tarek, self.likes, self.pizza) in g1, True) self.assertEquals((self.tarek, self.likes, self.cheese) in g1, False) self.assertEquals((self.bob, self.likes, self.cheese) in g1, False) def testGraphAdd(self): g1 = Graph() g2 = Graph() g1.add((self.tarek, self.likes, self.pizza)) g2.add((self.bob, self.likes, self.cheese)) g3 = g1 + g2 self.assertEquals(len(g3), 2) self.assertEquals((self.tarek, self.likes, self.pizza) in g3, True) self.assertEquals((self.tarek, self.likes, self.cheese) in g3, False) self.assertEquals((self.bob, self.likes, self.cheese) in g3, True) g1 += g2 self.assertEquals(len(g1), 2) self.assertEquals((self.tarek, self.likes, self.pizza) in g1, True) self.assertEquals((self.tarek, self.likes, self.cheese) in g1, False) self.assertEquals((self.bob, self.likes, self.cheese) in g1, True) def testGraphIntersection(self): g1 = Graph() g2 = Graph() g1.add((self.tarek, self.likes, self.pizza)) g1.add((self.michel, self.likes, self.cheese)) g2.add((self.bob, self.likes, self.cheese)) g2.add((self.michel, self.likes, self.cheese)) g3 = g1 * g2 self.assertEquals(len(g3), 1) self.assertEquals((self.tarek, self.likes, self.pizza) in g3, False) self.assertEquals((self.tarek, self.likes, self.cheese) in g3, False) self.assertEquals((self.bob, self.likes, self.cheese) in g3, False) self.assertEquals((self.michel, self.likes, self.cheese) in g3, True) g1 *= g2 self.assertEquals(len(g1), 1) self.assertEquals((self.tarek, self.likes, self.pizza) in g1, False) self.assertEquals((self.tarek, self.likes, self.cheese) in g1, False) self.assertEquals((self.bob, self.likes, self.cheese) in g1, False) self.assertEquals((self.michel, self.likes, self.cheese) in g1, True)
class TestKyotoCabinetGraphCore(unittest.TestCase): def setUp(self): store = "KyotoCabinet" self.graph = Graph(store=store) self.path = configString self.graph.open(self.path, create=True) def tearDown(self): self.graph.destroy(self.path) try: self.graph.close() except: pass if getattr(self, "path", False) and self.path is not None: if os.path.exists(self.path): if os.path.isdir(self.path): for f in os.listdir(self.path): os.unlink(self.path + "/" + f) os.rmdir(self.path) elif len(self.path.split(":")) == 1: os.unlink(self.path) else: os.remove(self.path) def test_namespaces(self): self.graph.bind("dc", "http://http://purl.org/dc/elements/1.1/") self.graph.bind("foaf", "http://xmlns.com/foaf/0.1/") self.assert_(len(list(self.graph.namespaces())) == 5) self.assert_(("foaf", rdflib.term.URIRef(u"http://xmlns.com/foaf/0.1/")) in list(self.graph.namespaces())) def test_play_journal(self): self.assertRaises(NotImplementedError, self.graph.store.play_journal, {"graph": self.graph}) def test_readable_index(self): print(readable_index(111)) def test_create_db(self): michel = rdflib.URIRef(u"michel") likes = rdflib.URIRef(u"likes") pizza = rdflib.URIRef(u"pizza") cheese = rdflib.URIRef(u"cheese") self.graph.add((michel, likes, pizza)) self.graph.add((michel, likes, cheese)) self.graph.commit() self.graph.store.close() if getattr(self, "path", False) and self.path is not None: if os.path.exists(self.path): if os.path.isdir(self.path): for f in os.listdir(self.path): os.unlink(self.path + "/" + f) os.rmdir(self.path) elif len(self.path.split(":")) == 1: os.unlink(self.path) else: os.remove(self.path) self.graph.store.open(self.path, create=True) ntriples = self.graph.triples((None, None, None)) self.assert_(len(list(ntriples)) == 0) def test_missing_db_exception(self): self.graph.store.close() if getattr(self, "path", False) and self.path is not None: if os.path.exists(self.path): if os.path.isdir(self.path): for f in os.listdir(self.path): os.unlink(self.path + "/" + f) os.rmdir(self.path) elif len(self.path.split(":")) == 1: os.unlink(self.path) else: os.remove(self.path) self.graph.store.open(self.path, create=True) ntriples = self.graph.triples((None, None, None)) self.assert_(len(list(ntriples)) == 0) def test_reopening_db(self): michel = rdflib.URIRef(u"michel") likes = rdflib.URIRef(u"likes") pizza = rdflib.URIRef(u"pizza") cheese = rdflib.URIRef(u"cheese") self.graph.add((michel, likes, pizza)) self.graph.add((michel, likes, cheese)) self.graph.commit() self.graph.store.close() self.graph.store.open(self.path, create=False) ntriples = self.graph.triples((None, None, None)) self.assert_(len(list(ntriples)) == 2) def test_reopening_missing_db(self): self.graph.store.close() self.assertRaises(ValueError, self.graph.store.open, ("/tmp/NotAnExistingDB"), create=False) def test_isopen_db(self): self.assert_(self.graph.store.is_open() == True) self.graph.store.close() self.assert_(self.graph.store.is_open() == False)
class GraphTest(TestCase): """ Testing the basic graph functionality. Heavily based on https://github.com/RDFLib/rdflib-postgresql/blob/master/test/graph_case.py """ # noqa: E501 store_name = "Django" storetest = True path = '' create = True michel = URIRef(u'michel') tarek = URIRef(u'tarek') bob = URIRef(u'bob') likes = URIRef(u'likes') hates = URIRef(u'hates') pizza = URIRef(u'pizza') cheese = URIRef(u'cheese') def setUp(self): self.graph = Graph(store=self.store_name) self.graph.destroy(self.path) self.graph.open(self.path, create=self.create) def tearDown(self): self.graph.destroy(self.path) self.graph.close() def addStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese self.graph.add((tarek, likes, pizza)) self.graph.add((tarek, likes, cheese)) self.graph.add((michel, likes, pizza)) self.graph.add((michel, likes, cheese)) self.graph.add((bob, likes, cheese)) self.graph.add((bob, hates, pizza)) self.graph.add((bob, hates, michel)) self.graph.commit() def removeStuff(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese self.graph.remove((tarek, likes, pizza)) self.graph.remove((tarek, likes, cheese)) self.graph.remove((michel, likes, pizza)) self.graph.remove((michel, likes, cheese)) self.graph.remove((bob, likes, cheese)) self.graph.remove((bob, hates, pizza)) self.graph.remove((bob, hates, michel)) def testAdd(self): self.addStuff() def testRemove(self): self.addStuff() self.removeStuff() def testTriples(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese triples = self.graph.triples Any = None self.addStuff() # unbound subjects self.assertEquals(len(list(triples((Any, likes, pizza)))), 2) self.assertEquals(len(list(triples((Any, hates, pizza)))), 1) self.assertEquals(len(list(triples((Any, likes, cheese)))), 3) self.assertEquals(len(list(triples((Any, hates, cheese)))), 0) # unbound objects self.assertEquals(len(list(triples((michel, likes, Any)))), 2) self.assertEquals(len(list(triples((tarek, likes, Any)))), 2) self.assertEquals(len(list(triples((bob, hates, Any)))), 2) self.assertEquals(len(list(triples((bob, likes, Any)))), 1) # unbound predicates self.assertEquals(len(list(triples((michel, Any, cheese)))), 1) self.assertEquals(len(list(triples((tarek, Any, cheese)))), 1) self.assertEquals(len(list(triples((bob, Any, pizza)))), 1) self.assertEquals(len(list(triples((bob, Any, michel)))), 1) # unbound subject, objects self.assertEquals(len(list(triples((Any, hates, Any)))), 2) self.assertEquals(len(list(triples((Any, likes, Any)))), 5) # unbound predicates, objects self.assertEquals(len(list(triples((michel, Any, Any)))), 2) self.assertEquals(len(list(triples((bob, Any, Any)))), 3) self.assertEquals(len(list(triples((tarek, Any, Any)))), 2) # unbound subjects, predicates self.assertEquals(len(list(triples((Any, Any, pizza)))), 3) self.assertEquals(len(list(triples((Any, Any, cheese)))), 3) self.assertEquals(len(list(triples((Any, Any, michel)))), 1) # all unbound self.assertEquals(len(list(triples((Any, Any, Any)))), 7) self.removeStuff() self.assertEquals(len(list(triples((Any, Any, Any)))), 0) def testConnected(self): graph = self.graph self.addStuff() self.assertEquals(True, graph.connected()) jeroen = URIRef("jeroen") unconnected = URIRef("unconnected") graph.add((jeroen, self.likes, unconnected)) self.assertEquals(False, graph.connected()) def testSub(self): g1 = Graph() g2 = Graph() g1.add((self.tarek, self.likes, self.pizza)) g1.add((self.bob, self.likes, self.cheese)) g2.add((self.bob, self.likes, self.cheese)) g3 = g1 - g2 self.assertEquals(len(g3), 1) self.assertEquals((self.tarek, self.likes, self.pizza) in g3, True) self.assertEquals((self.tarek, self.likes, self.cheese) in g3, False) self.assertEquals((self.bob, self.likes, self.cheese) in g3, False) g1 -= g2 self.assertEquals(len(g1), 1) self.assertEquals((self.tarek, self.likes, self.pizza) in g1, True) self.assertEquals((self.tarek, self.likes, self.cheese) in g1, False) self.assertEquals((self.bob, self.likes, self.cheese) in g1, False) def testGraphAdd(self): g1 = Graph() g2 = Graph() g1.add((self.tarek, self.likes, self.pizza)) g2.add((self.bob, self.likes, self.cheese)) g3 = g1 + g2 self.assertEquals(len(g3), 2) self.assertEquals((self.tarek, self.likes, self.pizza) in g3, True) self.assertEquals((self.tarek, self.likes, self.cheese) in g3, False) self.assertEquals((self.bob, self.likes, self.cheese) in g3, True) g1 += g2 self.assertEquals(len(g1), 2) self.assertEquals((self.tarek, self.likes, self.pizza) in g1, True) self.assertEquals((self.tarek, self.likes, self.cheese) in g1, False) self.assertEquals((self.bob, self.likes, self.cheese) in g1, True) def testGraphIntersection(self): g1 = Graph() g2 = Graph() g1.add((self.tarek, self.likes, self.pizza)) g1.add((self.michel, self.likes, self.cheese)) g2.add((self.bob, self.likes, self.cheese)) g2.add((self.michel, self.likes, self.cheese)) g3 = g1 * g2 self.assertEquals(len(g3), 1) self.assertEquals((self.tarek, self.likes, self.pizza) in g3, False) self.assertEquals((self.tarek, self.likes, self.cheese) in g3, False) self.assertEquals((self.bob, self.likes, self.cheese) in g3, False) self.assertEquals((self.michel, self.likes, self.cheese) in g3, True) g1 *= g2 self.assertEquals(len(g1), 1) self.assertEquals((self.tarek, self.likes, self.pizza) in g1, False) self.assertEquals((self.tarek, self.likes, self.cheese) in g1, False) self.assertEquals((self.bob, self.likes, self.cheese) in g1, False) self.assertEquals((self.michel, self.likes, self.cheese) in g1, True)
class BaseTests(object): store_name = 'Agamemnon' def setUp(self): with open(TEST_CONFIG_FILE) as f: settings = yaml.load(f) self.graph1 = Graph(store=self.store_name) self.graph2 = Graph(store=self.store_name) self.graph1.open(settings[self.settings1], True) self.graph2.open(settings[self.settings2], True) self.oNS = Namespace("http://www.example.org/rdf/things#") self.sNS = Namespace("http://www.example.org/rdf/people#") self.pNS = Namespace("http://www.example.org/rdf/relations/") self.graph1.bind('people', self.sNS) self.graph1.bind('relations', self.pNS) self.graph1.bind('things', self.oNS) self.graph2.bind('people', self.sNS) self.graph2.bind('relations', self.pNS) self.graph2.bind('things', self.oNS) self.michel = self.sNS.michel self.tarek = self.sNS.tarek self.alice = self.sNS.alice self.bob = self.sNS.bob self.likes = self.pNS.likes self.hates = self.pNS.hates self.named = self.pNS.named self.pizza = self.oNS.pizza self.cheese = self.oNS.cheese def tearDown(self): self.graph1.close() self.graph2.close() self.graph1.store.data_store.drop() self.graph2.store.data_store.drop() def addStuff(self, graph): graph.add((self.tarek, self.likes, self.pizza)) graph.add((self.tarek, self.likes, self.cheese)) graph.add((self.michel, self.likes, self.pizza)) graph.add((self.michel, self.likes, self.cheese)) graph.add((self.bob, self.likes, self.cheese)) graph.add((self.bob, self.hates, self.pizza)) graph.add((self.bob, self.hates, self.michel)) # gasp! graph.add((self.bob, self.named, Literal("Bob"))) def removeStuff(self, graph): graph.remove((None, None, None)) def testBind(self): store = self.graph1.store self.assertEqual(store.namespace(""), Namespace("http://www.example.org/rdf/")) self.assertEqual(store.namespace('people'), self.sNS) self.assertEqual(store.namespace('relations'), self.pNS) self.assertEqual(store.namespace('things'), self.oNS) self.assertEqual(store.namespace('blech'), None) self.assertEqual( "", store.prefix(Namespace("http://www.example.org/rdf/"))) self.assertEqual('people', store.prefix(self.sNS)) self.assertEqual('relations', store.prefix(self.pNS)) self.assertEqual('things', store.prefix(self.oNS)) self.assertEqual(None, store.prefix("blech")) self.assertEqual(len(list(self.graph1.namespaces())), 7) def testRelationshipToUri(self): uri = self.graph1.store.rel_type_to_ident('likes') self.assertEqual(uri, URIRef("http://www.example.org/rdf/likes")) uri = self.graph1.store.rel_type_to_ident('emotions:likes') self.assertEqual(uri, URIRef("emotions:likes")) self.graph1.bind('emotions', 'http://www.emo.org/') uri = self.graph1.store.rel_type_to_ident('emotions:likes') self.assertEqual(uri, URIRef("http://www.emo.org/likes")) def testNodeToUri(self): node = self.graph1.store._ds.create_node('blah', 'bleh') uri = self.graph1.store.node_to_ident(node) self.assertEqual(uri, URIRef("http://www.example.org/rdf/blah#bleh")) self.graph1.bind("bibble", "http://www.bibble.com/rdf/bibble#") node = self.graph1.store._ds.create_node('bibble', 'babble') uri = self.graph1.store.node_to_ident(node) self.assertEqual(uri, URIRef("http://www.bibble.com/rdf/bibble#babble")) def testUriToRelationship(self): rel_type = self.graph1.store.ident_to_rel_type( URIRef("http://www.example.org/rdf/likes")) self.assertEqual(rel_type, 'likes') rel_type = self.graph1.store.ident_to_rel_type( URIRef('emotions:likes')) prefix, rel_type = rel_type.split(":", 1) uuid.UUID(prefix.replace("_", "-")) self.assertEqual(rel_type, "likes") self.graph1.bind('emotions', 'http://www.emo.org/') rel_type = self.graph1.store.ident_to_rel_type( URIRef("http://www.emo.org/likes")) self.assertEqual(rel_type, 'emotions:likes') def testUriToNode(self): #test unbound uri uri = URIRef("http://www.example.org/rdf/blah#bleh") node = self.graph1.store.ident_to_node(uri, True) uuid.UUID(node.type.replace("_", "-")) self.assertEqual(node.key, "bleh") #test unbound uri with trailing / uri = URIRef("http://www.example.org/blah/bleh/") node = self.graph1.store.ident_to_node(uri, True) uuid.UUID(node.type.replace("_", "-")) self.assertEqual(node.key, "bleh/") # teset bound uri self.graph1.bind("bibble", "http://www.bibble.com/rdf/bibble#") uri = URIRef("http://www.bibble.com/rdf/bibble#babble") node = self.graph1.store.ident_to_node(uri, True) self.assertEqual(node.type, "bibble") self.assertEqual(node.key, "babble") # make sure if we reference a predicate as a subject or object, we will # still be able recover the correct uri uri = URIRef("http://www.example.org/rdf/doit") node = self.graph1.store.ident_to_node(uri, True) self.assertEqual(self.graph1.store.node_to_ident(node), uri) def testAdd(self): self.addStuff(self.graph1) def testRemove(self): self.addStuff(self.graph1) self.removeStuff(self.graph1) def testTriples(self): tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes hates = self.hates pizza = self.pizza cheese = self.cheese asserte = self.assertEquals triples = self.graph1.triples named = self.named Any = None self.addStuff(self.graph1) # unbound subjects asserte(len(list(triples((Any, likes, pizza)))), 2) asserte(len(list(triples((Any, hates, pizza)))), 1) asserte(len(list(triples((Any, likes, cheese)))), 3) asserte(len(list(triples((Any, hates, cheese)))), 0) asserte(len(list(triples((Any, named, Literal("Bob"))))), 1) # unbound objects asserte(len(list(triples((michel, likes, Any)))), 2) asserte(len(list(triples((tarek, likes, Any)))), 2) asserte(len(list(triples((bob, hates, Any)))), 2) asserte(len(list(triples((bob, likes, Any)))), 1) asserte(len(list(triples((bob, named, Any)))), 1) # unbound predicates asserte(len(list(triples((michel, Any, cheese)))), 1) asserte(len(list(triples((tarek, Any, cheese)))), 1) asserte(len(list(triples((bob, Any, pizza)))), 1) asserte(len(list(triples((bob, Any, michel)))), 1) asserte(len(list(triples((bob, Any, Literal("Bob"))))), 1) # unbound subject, objects asserte(len(list(triples((Any, hates, Any)))), 2) asserte(len(list(triples((Any, likes, Any)))), 5) asserte(len(list(triples((Any, named, Any)))), 1) # unbound predicates, objects asserte(len(list(triples((michel, Any, Any)))), 2) asserte(len(list(triples((bob, Any, Any)))), 4) asserte(len(list(triples((tarek, Any, Any)))), 2) # unbound subjects, predicates asserte(len(list(triples((Any, Any, pizza)))), 3) asserte(len(list(triples((Any, Any, cheese)))), 3) asserte(len(list(triples((Any, Any, michel)))), 1) # all unbound asserte(len(list(triples((Any, Any, Any)))), 8) self.removeStuff(self.graph1) asserte(len(list(triples((Any, Any, Any)))), 0) #def testStatementNode(self): #graph = self.graph1 #from rdflib.term import Statement #c = URIRef("http://example.org/foo#c") #r = Literal("blah") #s = Statement((self.michel, self.likes, self.pizza), None) #graph.add((s, RDF.value, r)) #self.assertEquals(r, graph.value(s, RDF.value)) #self.assertEquals(s, graph.value(predicate=RDF.value, object=r)) #def testGraphValue(self): #from rdflib.graph import GraphValue #graph = self.graph1 #g1 = Graph(store=self.store_name) #g1.open(self.settings1, True) #g1.add((self.alice, RDF.value, self.pizza)) #g1.add((self.bob, RDF.value, self.cheese)) #g1.add((self.bob, RDF.value, self.pizza)) #g2 = Graph(store=self.store_name) #g2.open(self.settings2, True) #g2.add((self.bob, RDF.value, self.pizza)) #g2.add((self.bob, RDF.value, self.cheese)) #g2.add((self.alice, RDF.value, self.pizza)) #gv1 = GraphValue(store=graph.store, graph=g1) #gv2 = GraphValue(store=graph.store, graph=g2) #graph.add((gv1, RDF.value, gv2)) #v = graph.value(gv1) ##print type(v) #self.assertEquals(gv2, v) ##print list(gv2) ##print gv2.identifier #graph.remove((gv1, RDF.value, gv2)) def testConnected(self): graph = self.graph1 self.addStuff(self.graph1) self.assertEquals(True, graph.connected()) jeroen = self.sNS.jeroen unconnected = self.oNS.unconnected graph.add((jeroen, self.likes, unconnected)) self.assertEquals(False, graph.connected()) # sanity check that we are ignoring reference nodes self.assertTrue(graph.store.ignore_reference_nodes) # if we don't ignore reference nodes, the graph should be connected graph.store.ignore_reference_nodes = False self.assertEquals(True, graph.connected()) def testSub(self): g1 = self.graph1 g2 = self.graph2 tarek = self.tarek bob = self.bob likes = self.likes pizza = self.pizza cheese = self.cheese g1.add((tarek, likes, pizza)) g1.add((bob, likes, cheese)) g2.add((bob, likes, cheese)) g3 = g1 - g2 self.assertEquals(len(g3), 1) self.assertEquals((tarek, likes, pizza) in g3, True) self.assertEquals((tarek, likes, cheese) in g3, False) self.assertEquals((bob, likes, cheese) in g3, False) g1 -= g2 self.assertEquals(len(g1), 1) self.assertEquals((tarek, likes, pizza) in g1, True) self.assertEquals((tarek, likes, cheese) in g1, False) self.assertEquals((bob, likes, cheese) in g1, False) def testGraphAdd(self): g1 = self.graph1 g2 = self.graph2 tarek = self.tarek bob = self.bob likes = self.likes pizza = self.pizza cheese = self.cheese g1.add((tarek, likes, pizza)) g2.add((bob, likes, cheese)) g3 = g1 + g2 self.assertEquals(len(g3), 2) self.assertEquals((tarek, likes, pizza) in g3, True) self.assertEquals((tarek, likes, cheese) in g3, False) self.assertEquals((bob, likes, cheese) in g3, True) g1 += g2 self.assertEquals(len(g1), 2) self.assertEquals((tarek, likes, pizza) in g1, True) self.assertEquals((tarek, likes, cheese) in g1, False) self.assertEquals((bob, likes, cheese) in g1, True) def testGraphIntersection(self): g1 = self.graph1 g2 = self.graph2 tarek = self.tarek michel = self.michel bob = self.bob likes = self.likes pizza = self.pizza cheese = self.cheese g1.add((tarek, likes, pizza)) g1.add((michel, likes, cheese)) g2.add((bob, likes, cheese)) g2.add((michel, likes, cheese)) g3 = g1 * g2 self.assertEquals(len(g3), 1) self.assertEquals((tarek, likes, pizza) in g3, False) self.assertEquals((tarek, likes, cheese) in g3, False) self.assertEquals((bob, likes, cheese) in g3, False) self.assertEquals((michel, likes, cheese) in g3, True) g1 *= g2 self.assertEquals(len(g1), 1) self.assertEquals((tarek, likes, pizza) in g1, False) self.assertEquals((tarek, likes, cheese) in g1, False) self.assertEquals((bob, likes, cheese) in g1, False) self.assertEquals((michel, likes, cheese) in g1, True) def testSerialize(self): node = self.graph1.store.ident_to_node(self.pizza, True) log.info("Pizza Attr: %s" % node.attributes) parse_serial_pairs = [ ('xml', 'xml'), # we will add more once we support context aware graphs ] for parse, serial in parse_serial_pairs: self.addStuff(self.graph1) v = self.graph1.serialize(format=serial) self.graph2.parse(data=v, format=parse) for triple in self.graph1: self.assertTrue(triple in self.graph2) for triple in self.graph2: self.assertTrue(triple in self.graph1) self.graph1.remove((None, None, None)) self.graph2.remove((None, None, None)) def testQuery(self): # parse from string # borrowed from http://en.wikipedia.org/wiki/Resource_Description_Framework rdf = """ <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:foaf="http://xmlns.com/foaf/0.1/" xmlns:dc="http://purl.org/dc/elements/1.1/"> <rdf:Description rdf:about="http://en.wikipedia.org/wiki/Tony_Benn"> <dc:title>Tony Benn</dc:title> <dc:publisher>Wikipedia</dc:publisher> <foaf:primaryTopic> <foaf:Person> <foaf:name>Tony Benn</foaf:name> </foaf:Person> </foaf:primaryTopic> </rdf:Description> </rdf:RDF> """ self.graph1.parse(data=rdf) rdflib.plugin.register('sparql', rdflib.query.Processor, 'rdfextras.sparql.processor', 'Processor') rdflib.plugin.register('sparql', rdflib.query.Result, 'rdfextras.sparql.query', 'SPARQLQueryResult') rows = self.graph1.query(""" SELECT ?a WHERE { ?a foaf:primaryTopic ?b . ?b foaf:name "Tony Benn" } """, initNs=dict(self.graph1.namespaces())) self.assertEqual(len(rows), 1) def testParse(self): # examples from w3c # TODO: this fails because of query string in url #self.graph1.parse("http://www.w3.org/2000/10/rdf-tests/rdfcore/amp-in-url/test001.rdf") self.graph1.parse( "http://www.w3.org/2000/10/rdf-tests/rdfcore/datatypes/test001.rdf" ) # TODO: this fails due to type parsing, probably an rdflib problem #self.graph1.parse("http://www.w3.org/2000/10/rdf-tests/rdfcore/datatypes/test002.rdf") self.graph1.parse( "http://www.w3.org/2000/10/rdf-tests/rdfcore/rdf-element-not-mandatory/test001.rdf" ) self.graph1.parse( "http://www.w3.org/2000/10/rdf-tests/rdfcore/rdfms-reification-required/test001.rdf" ) self.graph1.parse( "http://www.w3.org/2000/10/rdf-tests/rdfcore/rdfms-uri-substructure/test001.rdf" ) self.graph1.parse( "http://www.w3.org/2000/10/rdf-tests/rdfcore/rdfms-xmllang/test001.rdf" ) self.graph1.parse( "http://www.w3.org/2000/10/rdf-tests/rdfcore/rdfms-xmllang/test002.rdf" ) self.graph1.parse( "http://www.w3.org/2000/10/rdf-tests/rdfcore/rdfms-xmllang/test003.rdf" ) self.graph1.parse( "http://www.w3.org/2000/10/rdf-tests/rdfcore/rdfms-xmllang/test004.rdf" ) self.graph1.parse( "http://www.w3.org/2000/10/rdf-tests/rdfcore/rdfms-xmllang/test005.rdf" ) self.graph1.parse( "http://www.w3.org/2000/10/rdf-tests/rdfcore/rdfms-xmllang/test006.rdf" ) self.graph1.parse( "http://www.w3.org/2000/10/rdf-tests/rdfcore/unrecognised-xml-attributes/test001.rdf" ) self.graph1.parse( "http://www.w3.org/2000/10/rdf-tests/rdfcore/unrecognised-xml-attributes/test002.rdf" ) self.graph1.parse( "http://www.w3.org/2000/10/rdf-tests/rdfcore/xml-canon/test001.rdf" ) #additional examples for the fun of it self.graph1.parse("http://bigasterisk.com/foaf.rdf") #self.graph1.parse("http://www.w3.org/People/Berners-Lee/card.rdf") #self.graph1.parse("http://danbri.livejournal.com/data/foaf") self.graph1.serialize("serialized.rdf")