def assignWordId(self, word): """Assigns a new word id to the provided word and returns it.""" # First make sure it's not already in there if self._lexicon.has_key(word): return self._lexicon[word] try: inverse=self._inverseLex except AttributeError: # woops, old lexicom wo wids inverse=self._inverseLex=IOBTree() for word, wid in self._lexicon.items(): inverse[wid]=word wid=randid() while not inverse.insert(wid, word): wid=randid() if isinstance(word,StringType): self._lexicon[intern(word)] = wid else: self._lexicon[word] = wid return wid
def assignWordId(self, word): """Assigns a new word id to the provided word, and return it.""" # Double check it's not in the lexicon already, and if it is, just # return it. if self._lexicon.has_key(word): return self._lexicon[word] # Get word id. BBB Backward compat pain. inverse=self._inverseLex try: insert=inverse.insert except AttributeError: # we have an "old" BTree object if inverse: wid=inverse.keys()[-1]+1 else: self._inverseLex=IOBTree() wid=1 inverse[wid] = word else: # we have a "new" IOBTree object wid=randid() while not inverse.insert(wid, word): wid=randid() self._lexicon[word] = wid # Now take all the digrams and insert them into the digram map. for digram in self.createDigrams(word): set = self._digrams.get(digram, None) if set is None: self._digrams[digram] = set = IISet() set.insert(wid) return wid
def assignWordId(self, word): """Assigns a new word id to the provided word, and return it.""" # Double check it's not in the lexicon already, and if it is, just # return it. if self._lexicon.has_key(word): return self._lexicon[word] # Get word id. BBB Backward compat pain. inverse = self._inverseLex try: insert = inverse.insert except AttributeError: # we have an "old" BTree object if inverse: wid = inverse.keys()[-1] + 1 else: self._inverseLex = IOBTree() wid = 1 inverse[wid] = word else: # we have a "new" IOBTree object wid = randid() while not inverse.insert(wid, word): wid = randid() self._lexicon[word] = wid # Now take all the digrams and insert them into the digram map. for digram in self.createDigrams(word): set = self._digrams.get(digram, None) if set is None: self._digrams[digram] = set = IISet() set.insert(wid) return wid
def catalogObject(self, object, uid, threshold=None,idxs=[]): """ Adds an object to the Catalog by iteratively applying it all indexes. 'object' is the object to be cataloged 'uid' is the unique Catalog identifier for this object """ data = self.data # meta_data is stored as a tuple for efficiency newDataRecord = self.recordify(object) index=self.uids.get(uid, None) if index is not None: # old data if data.get(index, 0) != newDataRecord: # Update the meta-data, if necessary data[index] = newDataRecord else: # new data if type(data) is IOBTree: # New style, get radom id index=getattr(self, '_v_nextid', 0) if index%4000 == 0: index = randid() while not data.insert(index, newDataRecord): index=randid() # We want ids to be somewhat random, but there are # advantages for having some ids generated # sequentially when many catalog updates are done at # once, such as when reindexing or bulk indexing. # We allocate ids sequentially using a volatile base, # so different threads get different bases. This # further reduces conflict and reduces churn in # here and it result sets when bulk indexing. self._v_nextid=index+1 else: if data: # find the next available unique id index = data.keys()[-1] + 1 else: index=0 data[index] = newDataRecord try: self.__len__.change(1) except AttributeError: pass # No managed length (old-style) self.uids[uid] = index self.paths[index] = uid total = 0 if idxs==[]: use_indexes = self.indexes.keys() else: use_indexes = idxs for name in use_indexes: x = self.getIndex(name) if hasattr(x, 'index_object'): blah = x.index_object(index, object, threshold) total = total + blah else: LOG('Catalog', ERROR, ('catalogObject was passed ' 'bad index object %s.' % str(x))) return total