def test_it(self): from repoze.catalog.catalog import Catalog from repoze.catalog.indexes.field import CatalogFieldIndex from repoze.catalog.indexes.keyword import CatalogKeywordIndex from repoze.catalog.indexes.text import CatalogTextIndex catalog = Catalog() catalog['name'] = CatalogFieldIndex('name') catalog['title'] = CatalogFieldIndex('title') catalog['text'] = CatalogTextIndex('text') catalog['allowed'] = CatalogKeywordIndex('allowed') catalog.index_doc(1, Content('name1', 'title1', 'body one', ['a'])) catalog.index_doc(2, Content('name2', 'title2', 'body two', ['b'])) catalog.index_doc(3, Content('name3', 'title3', 'body three', ['c'])) catalog.index_doc(4, Content('name4', None, 'body four', ['a', 'b'])) catalog.index_doc( 5, Content('name5', 'title5', 'body five', ['a', 'b', 'c'])) catalog.index_doc(6, Content('name6', 'title6', 'body six', ['d'])) numdocs, result = catalog.query(self.query, sort_index='name', limit=5, names=dict(body='body')) self.assertEqual(numdocs, 2) self.assertEqual(list(result), [4, 5])
def test_it(self): from repoze.catalog.catalog import Catalog from repoze.catalog.indexes.field import CatalogFieldIndex from repoze.catalog.indexes.keyword import CatalogKeywordIndex from repoze.catalog.indexes.text import CatalogTextIndex catalog = Catalog() catalog['name'] = CatalogFieldIndex('name') catalog['title'] = CatalogFieldIndex('title') catalog['text'] = CatalogTextIndex('text') catalog['allowed'] = CatalogKeywordIndex('allowed') catalog.index_doc(1, Content('name1', 'title1', 'body one', ['a'])) catalog.index_doc(2, Content('name2', 'title2', 'body two', ['b'])) catalog.index_doc(3, Content('name3', 'title3', 'body three', ['c'])) catalog.index_doc(4, Content('name4', None, 'body four',['a', 'b'])) catalog.index_doc(5, Content('name5', 'title5', 'body five', ['a', 'b', 'c'])) catalog.index_doc(6, Content('name6', 'title6', 'body six',['d'])) numdocs, result = catalog.query( self.query, sort_index='name', limit=5, names=dict(body='body')) self.assertEqual(numdocs, 2) self.assertEqual(list(result), [4, 5])
class GraphDB(Persistent): def __init__(self): self._init() self.node_catalog = Catalog() self.edge_catalog = Catalog() def _init(self): self.nodes = IOBTree() self.edges = IOBTree() self.edgedata = IOBTree() self.outgoing = IOBTree() self.incoming = IOBTree() self.typeids = PObject() self._nodeid = Length(0) self._edgeid = Length(0) self._typeid = Length(0) def nodeid(self): self._nodeid.change(1) return self._nodeid.value def edgeid(self): self._edgeid.change(1) return self._edgeid.value def typeid(self, name): if not hasattr(self.typeids, name): self._typeid.change(1) setattr(self.typeids, name, self._typeid.value) self.revtypes[self._typeid.value] = name return getattr(self.typeids, name) @property def revtypes(self): if (not hasattr(self, '_v_revtypes')) or (not bool(self._v_revtypes)): dir(self.typeids) dir(self.typeids) self._v_revtypes = dict([ (v, k) for k, v in list(self.typeids.__dict__.items()) ]) return self._v_revtypes def getType(self, typeid): if type(typeid) != int: #lets assume an edge typeid = typeid[2] return self.revtypes[typeid] def addNode(self, **kwargs): if '_id' not in kwargs: _id = self.nodeid() else: _id = kwargs.pop('_id') self.nodes[_id] = kwargs ln = self.lightNode(_id, kwargs) self.node_catalog.index_doc(_id, ln) return ln def lightNode(self, _id, node=None): "{'_id':nodeid, ...other attributes...}" if node == None: node = self.nodes[_id] out = dict(node) out['_id'] = _id return out def addEdge(self, start, end, edgetype, **kwargs): _id = self.edgeid() if type(edgetype) != int: edgetype = self.typeid(edgetype) if type(start) == dict: start = start['_id'] if type(end) == dict: end = end['_id'] edge = [start, end, edgetype] self.edges[_id] = edge if kwargs: self.edgedata[_id] = kwargs le = self.lightEdge(_id, edge) self.edge_catalog.index_doc(_id, le) le = self.lightEdge(_id, edge) # edgeid:nodeid data = self.outgoing.setdefault(edgetype, IOBTree()).setdefault(start, {}) data[_id] = end self.outgoing[edgetype][start] = data data = self.incoming.setdefault(edgetype, IOBTree()).setdefault(end, {}) data[_id] = start self.incoming[edgetype][end] = data return le def lightEdge(self, _id, edge=None): '[sourceid targetid typeid kwargs edgeid]' if edge == None: edge = self.edges[_id] out = list(edge) out.append(self.edgedata.get(_id, {})) out.append(_id) return out def delEdge(self, edge): if type(edge) == int: edge = self.lightEdge(edge) start, end, edgetype, props, edgeid = edge data = self.outgoing[edgetype][start] del (data[edgeid]) self.outgoing[edgetype][start] = data data = self.incoming[edgetype][end] del (data[edgeid]) self.incoming[edgetype][end] = data del (self.edges[edgeid]) if edgeid in self.edgedata: self.edge_catalog.unindex_doc(edgeid) #del(self.edges[edgeid]) def delNode(self, node): if type(node) == int: node = self.lightNode(node) nodeid = node['_id'] for edgetype in list(self.outgoing.keys()): if len(self.outgoing[edgetype].get(nodeid, {})) > 0: raise StillConnected('outgoing', self.outgoing[edgetype][nodeid]) for edgetype in list(self.incoming.keys()): if len(self.incoming[edgetype].get(nodeid, {})) > 0: raise StillConnected('incoming', self.incoming[edgetype][nodeid]) #all good, lets delete for edgetype in list(self.outgoing.keys()): if nodeid in self.outgoing[edgetype]: del (self.outgoing[edgetype][nodeid]) for edgetype in list(self.incoming.keys()): if nodeid in self.incoming[edgetype]: del (self.incoming[edgetype][nodeid]) self.node_catalog.unindex_doc(nodeid) del (self.nodes[nodeid]) def updateNode(self, lightnode): nodeid = lightnode['_id'] data = dict(lightnode) self.nodes[nodeid] = data self.node_catalog.reindex_doc(nodeid, lightnode) def updateEdge(self, lightedge): edgeid = lightedge[4] edge = list(lightedge[:4]) data = lightedge[3] self.edges[edgeid] = edge if data: self.edgedata[edgeid] = data self.edge_catalog.reindex_doc(edgeid, lightedge) elif edgeid in self.edgedata: del (self.edgedata[edgeid]) self.edge_catalog.unindex_doc(edgeid) def kwQuery(self, **kwargs): kwitems = list(kwargs.items()) key, value = kwitems[0] query = rc_query.Eq(key, value) for k, v in kwitems[1:]: query = query & rc_query.Eq(k, v) return query def queryNode(self, **kwargs): result = self.node_catalog.query(self.kwQuery(**kwargs)) return [self.lightNode(i) for i in result[1]] def queryEdge(self, **kwargs): result = self.edge_catalog.query(self.kwQuery(**kwargs)) return [self.lightEdge(i) for i in result[1]] def prepareTypes(self, types=None): if types is None: return types else: if type(types) not in (list, tuple): types = [types] out = [] for t in types: if type(t) == str: t = self.typeid(t) out.append(t) return out ################## Higher Level API, functionality > speed ################### def getAllEdges(self, nodeids, directions=None, types=None, returnIds=0): if not islisttype(nodeids): nodeids = [nodeids] if directions == None: directions = ['i', 'o'] elif type(directions) not in (list, tuple): directions = [directions] types = self.prepareTypes(types) tmp = [] for n in nodeids: if type(n) != int: n = n['_id'] tmp.append(n) nodeids = tmp out = EdgeDict() for direction in directions: if direction.startswith('i'): d = 'incoming' elif direction.startswith('o'): d = 'outgoing' else: raise 'unknown' result = [] container = getattr(self, d) for edgetype in list(container.keys()): if types != None and edgetype not in types: continue for n in nodeids: edges = container[edgetype].get(n, {}) if returnIds: result.extend(list(edges.keys())) else: for key in list(edges.keys()): result.append(self.edge(key)) out[direction] = result if len(directions) == 1: return result else: return out # XXX work in progress def getEdges(self, start, end, edgetype): #import ipdb; ipdb.set_trace() if type(edgetype) != int: edgetype = self.typeid(edgetype) if type(start) != int: start = start['_id'] if type(end) != int: end = end['_id'] out = [] targets = self.outgoing.get(edgetype, {}).get(start, {}) for edgeid, nodeid in list(targets.items()): if nodeid == end: out.append(self.lightEdge(edgeid)) return out # XXX work in progress def addUniqueEdge(self, start, end, edgetype, **kwargs): if not self.getEdges(start, end, edgetype): return self.addEdge(start, end, edgetype, **kwargs) def clean(self): #import ipdb; ipdb.set_trace() for k in list(self.edges.keys()): self.delEdge(k) for k in list(self.nodes.keys()): self.delNode(k) self._init() def render(self, filename='graphagus', label='name', source=False): from graphviz import Digraph dot = Digraph('Graphagus dump', format='svg') for k in list(self.nodes.keys()): n = self.lightNode(k) dot.node(str(k), n[label]) for k in list(self.edges.keys()): e = self.lightEdge(k) dot.edge(str(e[0]), str(e[1]), self.getType(e)) if source: return dot.source else: dot.render(filename, cleanup=True) def edge(self, lightEdge): if type(lightEdge) == int: lightEdge = self.lightEdge(lightEdge) return Edge(self, lightEdge) def node(self, lightNode): if type(lightNode) == int: lightNode = self.lightNode(lightNode) return Node(self, lightNode)
class Table(ZopeDict): """ A table is a combination of a ZopeDict and a RepozeCatalog with integer keys and object values """ #TODO write my own IOBTree/Length combination instead of using ZopeDict? def __init__(self, parent, indices=None): ZopeDict.__init__(self) self.__parent__ = parent parent[self.__name__] = self self._cat = Catalog() if indices is None: self._currentIndex = "" else: self._currentIndex = indices[0] for index in indices: self._cat[index] = CatalogFieldIndex(index) def getCurrentIndex(self): # FIXME current index should be part of the user preferences return self._cat[self._currentIndex] def delete(self, names): print "TODO delete" pass def getFromIndex(self, index, target): # TODO return a generator instead of a list retval = [] result = self._cat.query(Eq(index, target)) for key in result[1]: user = self.get(key) if user: retval.append(user) return retval def maxKey(self): return self._data.maxKey() if len(self) else 0 # NB I have to cast key to an int for traversal to work # FIXME: it seems like this is the wrong place for this maybe it # is a sign I should give up on traversal altogether? def __getitem__(self, key): return self._data[int(key)] def setdefault(self, key, failobj=None): return ZopeDict.setdefault(self, int(key), failobj) def has_key(self, key): return ZopeDict.has_key(self, int(key)) def get(self, key, failobj=None): return ZopeDict.get(self, int(key), failobj) def __contains__(self, key): return ZopeDict.__contains__(self, int(key)) # zc.dict interface # Addition is done with __setitem__, overriding it will control addition. def __setitem__(self, key, value): #self._cat.index_doc(int(key), value) ZopeDict.__setitem__(self, int(key), value) #TODO find a way to do efficient automatic re-indexing # can't see that I can do better than Plone though # http://developer.plone.org/searching_and_indexing/indexing.html#when-indexing-happens-and-how-to-reindex-manually def reindex(self, value): self._cat.index_doc(value.key, value) # Removal is done with either pop or clear, overriding these methods will # control removal. def pop(self, key, *args): retval = ZopeDict.pop(self, int(key), *args) self._cat.unindex_doc(int(key)) return retval def clear(self): ZopeDict.clear(self) self._cat.clear()
class Catalog(object): def __init__(self): self._catalog = RepozeCatalog() self._document_map = DocumentMap() @property def catalog(self): """ convenient proxy to real catalog """ return self._catalog def query(self, qry, as_summary=False, as_object=False, **kwargs): """ Query the catalog. If as_summary is set, return object summaries, as fetched from info from the indexes""" res = self._catalog.query(qry, **kwargs) if as_summary: return [self.get_object_summary(uuid) for uuid in res[1]] elif as_object: return [self.get_object(uuid) for uuid in res[1]] else: return res def index_object(self, object): path = object_to_path(object) uuid = object.uuid docid = self._document_map.add(uuid) self._document_map.add_metadata(docid, {'path': path}) try: self.catalog.index_doc(docid, object) self._p_changed = 1 self.catalog._p_changed = 1 self._document_map._p_changed = 1 self.__parent__._p_changed = 1 except: LOGGER.exception("Could not index object!") def reindex_object(self, object): uuid = object.uuid docid = self._document_map.docid_for_address(uuid) if not docid: self.index_object(object) docid = self._document_map.docid_for_address(uuid) # update the path of the object in the documentmap since the # object might have been renamed / moved path = object_to_path(object) self._document_map.add_metadata(docid, {'path': path}) try: self.catalog.reindex_doc(docid, object) self._p_changed = 1 self.catalog._p_changed = 1 self._document_map._p_changed = 1 self.__parent__._p_changed = 1 except: LOGGER.exception("Could not index object!") def unindex_object(self, object): uuid = object.uuid docid = self._document_map.docid_for_address(uuid) if docid: self.catalog.unindex_doc(docid) self._document_map.remove_docid(docid) self._p_changed = 1 self.catalog._p_changed = 1 self._document_map._p_changed = 1 self.__parent__._p_changed = 1 def clear(self): self._catalog.clear() self._document_map = DocumentMap() self._p_changed = 1 self.catalog._p_changed = 1 self._document_map._p_changed = 1 self.__parent__._p_changed = 1 def get_object(self, docid): metadata = self._document_map.get_metadata(docid) path = metadata['path'] return path_to_object(path, self.__parent__) def get_object_summary(self, uuid): """ Return a summary of the found object, based on the values that the indexes hold on the given uuid""" summ = {} for key in self.catalog.keys(): idx = self.catalog[key] if hasattr(idx, "_rev_index"): summ[key] = idx._rev_index.get(uuid, '') summ['key'] = uuid return ObjectSummary(summ) def list_objects(self): docids = self.list_object_ids() for docid in docids: metadata = self._document_map.get_metadata(docid) yield (docid, metadata['path']) def list_object_ids(self): return self._document_map.docid_to_address.keys()
class GraphDB(Persistent): def __init__(self): self._init() self.node_catalog= Catalog() self.edge_catalog = Catalog() def _init(self): self.nodes = IOBTree() self.edges = IOBTree() self.edgedata = IOBTree() self.outgoing = IOBTree() self.incoming = IOBTree() self.typeids = PObject() self._nodeid = Length(0) self._edgeid = Length(0) self._typeid = Length(0) def nodeid(self): self._nodeid.change(1) return self._nodeid.value def edgeid(self): self._edgeid.change(1) return self._edgeid.value def typeid(self,name): if not hasattr(self.typeids,name): self._typeid.change(1) setattr(self.typeids,name,self._typeid.value) self.revtypes[self._typeid.value]=name return getattr(self.typeids,name) @property def revtypes(self): if not hasattr(self,'_v_revtypes'): dir(self.typeids) dir(self.typeids) self._v_revtypes = dict([(v,k) for k,v in self.typeids.__dict__.items()]) return self._v_revtypes def getType(self,typeid): if type(typeid) != int: #lets assume an edge typeid = typeid[2] return self.revtypes[typeid] def addNode(self,**kwargs): _id = self.nodeid() self.nodes[_id]=kwargs ln = self.lightNode(_id,kwargs) self.node_catalog.index_doc(_id,ln) return ln def lightNode(self,_id,node=None): "{'id':nodeid, ...other attributes...}" if node==None: node = self.nodes[_id] out = dict(node) out['_id'] = _id return out def addEdge(self,start,end,edgetype,**kwargs): _id = self.edgeid() if type(edgetype) != int: edgetype = self.typeid(edgetype) if type(start) == dict: start = start['_id'] if type(end) == dict: end = end['_id'] edge = [start,end,edgetype] self.edges[_id]=edge if kwargs: self.edgedata[_id]=kwargs le = self.lightEdge(_id,edge) self.edge_catalog.index_doc(_id,le) le = self.lightEdge(_id,edge) # edgeid:nodeid data = self.outgoing.setdefault(edgetype,IOBTree()).setdefault(start,{}) data[_id]=end self.outgoing[edgetype][start]=data data = self.incoming.setdefault(edgetype,IOBTree()).setdefault(end,{}) data[_id]=start self.incoming[edgetype][end]=data return le def lightEdge(self,_id,edge=None): '[sourceid targetid typeid kwargs edgeid]' if edge==None: edge = self.edges[_id] out = list(edge) out.append(self.edgedata.get(_id,{})) out.append(_id) return out def delEdge(self,edge): if type(edge)==int: edge=self.lightEdge(edge) start,end,edgetype,props,edgeid = edge data = self.outgoing[edgetype][start] del(data[edgeid]) self.outgoing[edgetype][start]=data data = self.incoming[edgetype][end] del(data[edgeid]) self.incoming[edgetype][end]=data del(self.edges[edgeid]) if self.edgedata.has_key(edgeid): self.edge_catalog.unindex_doc(edgeid) #del(self.edges[edgeid]) def delNode(self,node): if type(node)==int: node=self.lightNode(node) nodeid = node['_id'] for edgetype in self.outgoing.keys(): if len(self.outgoing[edgetype].get(nodeid,{}))>0: raise StillConnected('outgoing',self.outgoing[edgetype][nodeid]) for edgetype in self.incoming.keys(): if len(self.incoming[edgetype].get(nodeid,{}))>0: raise StillConnected('incoming',self.incoming[edgetype][nodeid]) #all good, lets delete for edgetype in self.outgoing.keys(): if self.outgoing[edgetype].has_key(nodeid): del(self.outgoing[edgetype][nodeid]) for edgetype in self.incoming.keys(): if self.incoming[edgetype].has_key(nodeid): del(self.incoming[edgetype][nodeid]) self.node_catalog.unindex_doc(nodeid) del(self.nodes[nodeid]) def updateNode(self,lightnode): nodeid = lightnode['_id'] data = dict(lightnode) self.nodes[nodeid]=data self.node_catalog.reindex_doc(nodeid,lightnode) def updateEdge(self,lightedge): edgeid = lightedge[4] edge = list(lightedge[:4]) data = lightedge[3] self.edges[edgeid]=edge if data: self.edgedata[edgeid]=data self.edge_catalog.reindex_doc(edgeid,lightedge) elif self.edgedata.has_key(edgeid): del(self.edgedata[edgeid]) self.edge_catalog.unindex_doc(edgeid) def kwQuery(self,**kwargs): kwitems = kwargs.items() key,value = kwitems[0] query = rc_query.Eq(key,value) for k,v in kwitems[1:]: query = query & rc_query.Eq(k,v) return query def queryNode(self,**kwargs): result = self.node_catalog.query(self.kwQuery(**kwargs)) return [self.lightNode(i) for i in result[1]] def queryEdge(self,**kwargs): result = self.edge_catalog.query(self.kwQuery(**kwargs)) return [self.lightEdge(i) for i in result[1]] ################## Higher Level API, functionality > speed ################### def getAllEdges(self,nodeids,directions=None,types=None): if type(nodeids) not in (list,tuple): nodeids = [nodeids] if directions == None: directions = ['i','o'] elif type(directions) not in (list,tuple): directions = [directions] if types != None: if type(types) not in (list,tuple): types = [types] tmp = [] for t in types: if type(t)==str: t = self.typeid(t) tmp.append(t) types = tmp tmp = [] for n in nodeids: if type(n) != int: n = n['_id'] tmp.append(n) nodeids = tmp out = EdgeDict() for direction in directions: if direction.startswith('i'): d = 'incoming' elif direction.startswith('o'): d = 'outgoing' else: raise 'unknown' result = [] container = getattr(self,d) for edgetype in container.keys(): if types !=None and edgetype not in types: continue for n in nodeids: edges = container[edgetype].get(n,{}) for key in edges.keys(): result.append(self.edge(key)) out[direction] = result if len(directions) == 1: return result else: return out # XXX work in progress def getEdges(self,start,end,edgetype): #import ipdb; ipdb.set_trace() if type(edgetype) != int: edgetype = self.typeid(edgetype) if type(start) != int: start = start['_id'] if type(end) != int: end = end['_id'] out = [] targets = self.outgoing.get(edgetype,{}).get(start,{}) for edgeid,nodeid in targets.items(): if nodeid==end: out.append(self.lightEdge(edgeid)) return out # XXX work in progress def addUniqueEdge(self,start,end,edgetype,**kwargs): if not self.getEdges(start,end,edgetype): return self.addEdge(start,end,edgetype,**kwargs) def clean(self): #import ipdb; ipdb.set_trace() for k in list(self.edges.keys()): self.delEdge(k) for k in list(self.nodes.keys()): self.delNode(k) self._init() def render(self,filename='graphagus',source=False): from graphviz import Digraph dot = Digraph('Graphagus dump',format='svg') for k in self.nodes.keys(): n = self.lightNode(k) dot.node(str(k),n['name']) for k in self.edges.keys(): e = self.lightEdge(k) dot.edge(str(e[0]), str(e[1]), self.getType(e)) if source: return dot.source else: dot.render(filename,cleanup=True) def edge(self,lightEdge): if type(lightEdge) == int: lightEdge = self.lightEdge(lightEdge) return Edge(self,lightEdge) def node(self,lightNode): if type(lightNode) == int: lightNode = self.lightNode(lightNode) return Node(self,lightNode)
class Catalog(object): def __init__(self, home, name, docid_attr='docid'): self.indexes = Indexes() self.home = home self.name = name self.docid_attr = docid_attr self.document_map = DocumentMap() def index_doc(self, doc): return self.indexes.index_doc(self._get_docid(doc), doc) def reindex_doc(self, doc): return self.indexes.reindex_doc(self._get_docid(doc), doc) def unindex_doc(self, doc_or_docid): if type(doc_or_docid) in (int, long): docid = doc_or_docid else: docid = getattr(doc_or_docid, self.docid_attr, None) if docid is None: return self.document_map.remove_docid(docid) return self.indexes.unindex_doc(docid) def query(self, queryobject, sort_index=None, limit=None, sort_type=None, reverse=False, names=None): count, docids = self.indexes.query( queryobject, sort_index=sort_index, limit=limit, sort_type=sort_type, reverse=reverse, names=names) return count, docids, self.resolver() def _get_docid(self, doc): docid_attr = self.docid_attr path = resource_path(doc) document_map = self.document_map docid = getattr(doc, docid_attr, None) if docid is None: docid = document_map.add(path) setattr(doc, docid_attr, docid) else: old_path = document_map.address_for_docid(docid) if old_path != path: document_map.remove_address(old_path) document_map.add(path, docid) return docid def resolver(self): root = self.home['content'] document_map = self.document_map def resolve(docid): path = document_map.address_for_docid(docid) return find_resource(root, path) return resolve def add_index(self, name, index): """ Add an index to an existing catalog. """ log.info('Adding index: %s' % name) self.indexes[name] = index resolver = self.resolver() for docid in self.document_map.docid_to_address.keys(): doc = resolver(docid) log.info('Calculating index for %s' % resource_path(doc)) index.index_doc(docid, doc)