Пример #1
0
    def group(self, seq):
        sortIndex = self._sortIndex
        sortReverse = self._sortReverse
        ns = len(seq)
        ni = len(sortIndex)
        if ns >= 0.1 * ni:
            # result large compared to index -- sort via index
            handled = IISet()
            hn = 0
            _load = getattr(sortIndex, '_load', None)
            if _load is None:
                # not an optimized index
                items = sortIndex.items()

                _load = lambda (x1, x2): x2
                if sortReverse: items.reverse()
            elif sortReverse:
                gRO = getattr(sortIndex, 'getReverseOrder', None)
                items = gRO and gRO()
                if items is None:
                    items = list(sortIndex._index.keys())
                    items.reverse()
            else:
                items = sortIndex._index.keys()
            for i in items:
                ids = intersection(seq, _load(i))
                if ids:
                    handled.update(ids)
                    hn += len(ids)
                    yield i, ids
            if hn != len(seq): yield None, difference(seq, handled)
        else:
            # result relatively small -- sort via result
            m = OOBTree()
            keyFor = getattr(sortIndex, 'keyForDocument', None)
            # work around "nogopip" bug: it defines "keyForDocument" as an integer
            if not callable(keyFor):
                # this will fail, when the index neither defines a reasonable
                # "keyForDocument" nor "documentToKeyMap". In this case,
                # the index cannot be used for sorting.
                keyFor = lambda doc, map=sortIndex.documentToKeyMap(): map[doc]
            noValue = IITreeSet()
            for doc in seq.keys():
                try:
                    k = keyFor(doc)
                except KeyError:
                    noValue.insert(doc)
                    continue
                l = m.get(k)
                if l is None: l = m[k] = IITreeSet()
                l.insert(doc)
            items = m.items()
            if sortReverse:
                items = list(items)
                items.reverse()
            for i in items:
                yield i
            if noValue: yield None, noValue
Пример #2
0
    def getPositions(self, docid, wordid):
        """ return a sequence of positions of occurrences of wordid within
            a document given by its docid.
        """

        encoded_wid = encode((wordid, ))
        encoded_document = self._doc2wid[docid].get()

        positions = IITreeSet()
        for pos, wid in enumerate(decode(encoded_document)):
            if wid == wordid:
                positions.insert(pos)
        return positions
Пример #3
0
    def getPositions(self, docid, wordid):
        """ return a sequence of positions of occurrences of wordid within
            a document given by its docid.
        """

        encoded_wid = encode((wordid,))
        encoded_document = self._doc2wid[docid].get()

        positions = IITreeSet()
        for pos, wid in enumerate(decode(encoded_document)):
            if wid == wordid:
                positions.insert(pos)
        return positions
Пример #4
0
 def group(self, seq):
   sortIndex = self._sortIndex; sortReverse = self._sortReverse
   ns = len(seq); ni = len(sortIndex)
   if ns >= 0.1 * ni:
     # result large compared to index -- sort via index
     handled = IISet(); hn = 0
     _load = getattr(sortIndex, '_load', None)
     if _load is None:
       # not an optimized index
       items = sortIndex.items()
       
       _load = lambda (x1, x2): x2
       if sortReverse: items.reverse()
     elif sortReverse:
       gRO = getattr(sortIndex, 'getReverseOrder', None)
       items = gRO and gRO()
       if items is None:
         items = list(sortIndex._index.keys()); items.reverse()
     else: items = sortIndex._index.keys()
     for i in items:
       ids = intersection(seq, _load(i))
       if ids:
         handled.update(ids); hn += len(ids)
         yield i, ids
     if hn != len(seq): yield None, difference(seq, handled)
   else:
     # result relatively small -- sort via result
     m = OOBTree()
     keyFor = getattr(sortIndex, 'keyForDocument', None)
     # work around "nogopip" bug: it defines "keyForDocument" as an integer
     if not callable(keyFor):
       # this will fail, when the index neither defines a reasonable
       # "keyForDocument" nor "documentToKeyMap". In this case,
       # the index cannot be used for sorting.
       keyFor = lambda doc, map=sortIndex.documentToKeyMap(): map[doc]
     noValue = IITreeSet()
     for doc in seq.keys():
       try: k = keyFor(doc)
       except KeyError: noValue.insert(doc); continue
       l = m.get(k)
       if l is None: l = m[k] = IITreeSet()
       l.insert(doc)
     items = m.items()
     if sortReverse: items = list(items); items.reverse()
     for i in items: yield i
     if noValue: yield None, noValue
Пример #5
0
    def insert(self, params):
        record = self.tuplify(params)

        # Determine the primary key.
        primary_key = []
        for position, column in self.col_info:
            if column.primary:
                if record[position] is None:
                    raise ValueError, (
                        "No value provided for primary key column %s" %
                        repr(column.name))
                primary_key.append(record[position])
        if primary_key:
            primary_key = tuple(primary_key)
            if self.primary_index.has_key(primary_key):
                raise DuplicateError("Primary key %s in use" %
                                     repr(primary_key))

        # Add a record.
        rid = self.next_rid
        self.next_rid += 1  # XXX Hotspot!
        record = (rid, ) + record[1:]
        self.data[rid] = record
        if primary_key:
            self.primary_index[primary_key] = rid

        # Add to indexes.
        for position, column in self.col_info:
            name = column.name
            value = record[position]
            if value is not None:
                if self.indexes.has_key(name):
                    set = self.indexes[name].get(value)
                    if set is None:
                        set = IITreeSet()
                        self.indexes[name][value] = set
                    set.insert(rid)

        # Return the number of rows inserted.
        return 1
Пример #6
0
 def group(self, seq):
   sortIndex = self._sortIndex; sortReverse = self._sortReverse
   ns = len(seq); ni = len(sortIndex)
   if ns >= 0.1 * ni:
     # result large compared to index -- sort via index
     handled = IISet(); hn = 0
     _load = getattr(sortIndex, '_load', None)
     if _load is None:
       # not an optimized index
       items = sortIndex.items()
       
       _load = lambda (x1, x2): x2
       if sortReverse: items.reverse()
     elif sortReverse:
       gRO = getattr(sortIndex, 'getReverseOrder', None)
       items = gRO and gRO()
       if items is None:
         items = list(sortIndex._index.keys()); items.reverse()
     else: items = sortIndex._index.keys()
     for i in items:
       ids = intersection(seq, _load(i))
       if ids:
         handled.update(ids); hn += len(ids)
         yield i, ids
     if hn != len(seq): yield None, difference(seq, handled)
   else:
     # result relatively small -- sort via result
     keyFor = sortIndex.keyForDocument; m = OOBTree()
     noValue = IITreeSet()
     for doc in seq.keys():
       try: k = keyFor(doc)
       except KeyError: noValue.insert(doc); continue
       l = m.get(k)
       if l is None: l = m[k] = IITreeSet()
       l.insert(doc)
     items = m.items()
     if sortReverse: items = list(items); items.reverse()
     for i in items: yield i
     if noValue: yield None, noValue
Пример #7
0
class BooleanIndex(UnIndex):
    """Index for booleans

       self._index = set([documentId1, documentId2])
       self._unindex = {documentId:[True/False]}

       self._length is the length of the unindex
       self._index_length is the length of the index

       False doesn't have actual entries in _index.
    """

    meta_type = "BooleanIndex"

    manage_options = (
        {'label': 'Settings',
         'action': 'manage_main'},
        {'label': 'Browse',
         'action': 'manage_browse'},
    )

    query_options = ["query"]

    manage = manage_main = DTMLFile('dtml/manageBooleanIndex', globals())
    manage_main._setName('manage_main')
    manage_browse = DTMLFile('../dtml/browseIndex', globals())

    _index_value = 1
    _index_length = None

    def clear(self):
        self._index = IITreeSet()
        self._index_length = BTrees.Length.Length()
        self._index_value = 1
        self._unindex = IIBTree()
        self._length = BTrees.Length.Length()
        if self._counter is None:
            self._counter = BTrees.Length.Length()
        else:
            self._increment_counter()

    def histogram(self):
        """Return a mapping which provides a histogram of the number of
        elements found at each point in the index.
        """
        histogram = {}
        indexed = bool(self._index_value)
        histogram[indexed] = self._index_length.value
        histogram[not indexed] = self._length.value - self._index_length.value
        return histogram

    def _invert_index(self, documentId=None):
        self._index_value = indexed = int(not self._index_value)
        self._index.clear()
        length = 0
        for rid, value in self._unindex.iteritems():
            if value == indexed:
                self._index.add(rid)
                length += 1
        # documentId is the rid of the currently processed object that
        # triggered the invert. in the case of unindexing, the rid hasn't
        # been removed from the unindex yet. While indexing, the rid will
        # be added to the index and unindex after this method is done
        if documentId is not None:
            self._index.remove(documentId)
            length -= 1
        self._index_length = BTrees.Length.Length(length)

    def insertForwardIndexEntry(self, entry, documentId):
        """If the value matches the indexed one, insert into treeset
        """
        # When we get the first entry, decide to index the opposite of what
        # we got, as indexing zero items is fewer than one.
        if self._length.value == 0:
            self._index_value = int(not bool(entry))

        # if the added entry value is index value, insert it into index
        if bool(entry) is bool(self._index_value):
            self._index_length.change(1)
            self._index.insert(documentId)

        # insert value into global unindex (before computing index invert)
        self._unindex[documentId] = entry
        self._length.change(1)

        # is the index (after adding the current entry) larger than 60%
        # of the total length? than switch the indexed value
        if bool(entry) is bool(self._index_value):
            if (self._index_length.value) >= ((self._length.value) * 0.6):
                self._invert_index()

    def removeForwardIndexEntry(self, entry, documentId, check=True):
        """Take the entry provided and remove any reference to documentId
        in its entry in the index.
        """
        if bool(entry) is bool(self._index_value):
            try:
                self._index.remove(documentId)
                self._index_length.change(-1)
            except ConflictError:
                raise
            except Exception:
                LOG.exception(
                    '%s: unindex_object could not remove documentId %s '
                    'from index %s. This should not happen.' % (
                        self.__class__.__name__,
                        str(documentId),
                        str(self.id)))
        elif check:
            # is the index (after removing the current entry) larger than
            # 60% of the total length? than switch the indexed value
            if (self._index_length.value) <= ((self._length.value - 1) * 0.6):
                self._invert_index(documentId)
                return

    def _index_object(self, documentId, obj, threshold=None, attr=''):
        """ index and object 'obj' with integer id 'documentId'"""
        returnStatus = 0

        # First we need to see if there's anything interesting to look at
        datum = self._get_object_datum(obj, attr)

        # Make it boolean, int as an optimization
        if datum is not _marker:
            datum = int(bool(datum))

        # We don't want to do anything that we don't have to here, so we'll
        # check to see if the new and existing information is the same.
        oldDatum = self._unindex.get(documentId, _marker)
        if datum != oldDatum:
            if oldDatum is not _marker:
                self.removeForwardIndexEntry(oldDatum, documentId, check=False)
                if datum is _marker:
                    try:
                        del self._unindex[documentId]
                        self._length.change(-1)
                    except ConflictError:
                        raise
                    except Exception:
                        LOG.error('Should not happen: oldDatum was there, now '
                                  'its not, for document with id %s' %
                                  documentId)

            if datum is not _marker:
                self.insertForwardIndexEntry(datum, documentId)

            returnStatus = 1

        return returnStatus

    def unindex_object(self, documentId):
        """ Unindex the object with integer id 'documentId' and don't
        raise an exception if we fail
        """
        unindexRecord = self._unindex.get(documentId, _marker)
        if unindexRecord is _marker:
            return None

        self._increment_counter()

        self.removeForwardIndexEntry(unindexRecord, documentId)

        try:
            del self._unindex[documentId]
            self._length.change(-1)
        except ConflictError:
            raise
        except Exception:
            LOG.debug('Attempt to unindex nonexistent document'
                      ' with id %s' % documentId, exc_info=True)

    def query_index(self, record, resultset=None):
        index = self._index
        indexed = self._index_value

        for key in record.keys:
            if bool(key) is bool(indexed):
                # If we match the indexed value, check index
                return intersection(index, resultset)
            else:
                # Otherwise, remove from resultset or _unindex
                if resultset is None:
                    return union(difference(self._unindex, index), IISet([]))
                else:
                    return difference(resultset, index)
        return IISet()

    def indexSize(self):
        """Return distinct values, as an optimization we always claim 2."""
        return 2

    def items(self):
        # return a list of value to int set of rid tuples
        indexed = self._index_value
        items = [(bool(indexed), self._index)]
        false = IISet()
        for rid, value in self._unindex.iteritems():
            if value != indexed:
                false.add(rid)
        items.append((not bool(indexed), false))
        return items
Пример #8
0
class BooleanIndex(UnIndex):
    """Index for booleans

       self._index = set([documentId1, documentId2])
       self._unindex = {documentId:[True/False]}

       self._length is the length of the unindex
       self._index_length is the length of the index

       False doesn't have actual entries in _index.
    """

    meta_type = "BooleanIndex"

    manage_options = (
        {
            'label': 'Settings',
            'action': 'manage_main'
        },
        {
            'label': 'Browse',
            'action': 'manage_browse'
        },
    )

    query_options = ["query"]

    manage = manage_main = DTMLFile('dtml/manageBooleanIndex', globals())
    manage_main._setName('manage_main')
    manage_browse = DTMLFile('../dtml/browseIndex', globals())

    _index_value = 1
    _index_length = None

    def clear(self):
        self._index = IITreeSet()
        self._index_length = BTrees.Length.Length()
        self._index_value = 1
        self._unindex = IIBTree()
        self._length = BTrees.Length.Length()

    def histogram(self):
        """Return a mapping which provides a histogram of the number of
        elements found at each point in the index.
        """
        histogram = {}
        indexed = bool(self._index_value)
        histogram[indexed] = self._index_length.value
        histogram[not indexed] = self._length.value - self._index_length.value
        return histogram

    def _invert_index(self, documentId=None):
        self._index_value = indexed = int(not self._index_value)
        self._index.clear()
        length = 0
        for rid, value in self._unindex.iteritems():
            if value == indexed:
                self._index.add(rid)
                length += 1
        # documentId is the rid of the currently processed object that
        # triggered the invert. in the case of unindexing, the rid hasn't
        # been removed from the unindex yet. While indexing, the rid will
        # be added to the index and unindex after this method is done
        if documentId is not None:
            self._index.remove(documentId)
            length -= 1
        self._index_length = BTrees.Length.Length(length)

    def _inline_migration(self):
        self._length = BTrees.Length.Length(len(self._unindex.keys()))
        self._index_length = BTrees.Length.Length(len(self._index))
        if self._index_length.value > (self._length.value / 2):
            self._index_value = 1
            self._invert_index()
        else:
            # set an instance variable
            self._index_value = 1

    def insertForwardIndexEntry(self, entry, documentId):
        """If the value matches the indexed one, insert into treeset
        """
        # when we get the first entry, decide to index the opposite of what
        # we got, as indexing zero items is fewer than one
        # BBB inline migration
        if self._index_length is None:
            self._inline_migration()

        if self._length.value == 0:
            self._index_value = int(not bool(entry))

        # if the added entry value is index value, insert it into index
        if bool(entry) is bool(self._index_value):
            self._index_length.change(1)
            self._index.insert(documentId)

        # insert value into global unindex (before computing index invert)
        self._unindex[documentId] = entry
        self._length.change(1)

        # is the index (after adding the current entry) larger than 60%
        # of the total length? than switch the indexed value
        if bool(entry) is bool(self._index_value):
            if (self._index_length.value) >= ((self._length.value) * 0.6):
                self._invert_index()

    def removeForwardIndexEntry(self, entry, documentId, check=True):
        """Take the entry provided and remove any reference to documentId
        in its entry in the index.
        """
        index_length = self._index_length
        if index_length is None:
            self._inline_migration()

        if bool(entry) is bool(self._index_value):
            try:
                self._index.remove(documentId)
                # BBB inline migration
                length = self._index_length
                length.change(-1)
            except ConflictError:
                raise
            except Exception:
                LOG.exception(
                    '%s: unindex_object could not remove '
                    'documentId %s from index %s. This '
                    'should not happen.' %
                    (self.__class__.__name__, str(documentId), str(self.id)))
        elif check:
            length = self._length.value
            index_length = self._index_length.value
            # is the index (after removing the current entry) larger than
            # 60% of the total length? than switch the indexed value
            if (index_length) <= ((length - 1) * 0.6):
                self._invert_index(documentId)
                return

    def _index_object(self, documentId, obj, threshold=None, attr=''):
        """ index and object 'obj' with integer id 'documentId'"""
        returnStatus = 0

        # First we need to see if there's anything interesting to look at
        datum = self._get_object_datum(obj, attr)

        # Make it boolean, int as an optimization
        if datum is not _marker:
            datum = int(bool(datum))

        # We don't want to do anything that we don't have to here, so we'll
        # check to see if the new and existing information is the same.
        oldDatum = self._unindex.get(documentId, _marker)
        if datum != oldDatum:
            if oldDatum is not _marker:
                self.removeForwardIndexEntry(oldDatum, documentId, check=False)
                if datum is _marker:
                    try:
                        del self._unindex[documentId]
                        self._length.change(-1)
                    except ConflictError:
                        raise
                    except Exception:
                        LOG.error('Should not happen: oldDatum was there, now '
                                  'its not, for document with id %s' %
                                  documentId)

            if datum is not _marker:
                self.insertForwardIndexEntry(datum, documentId)

            returnStatus = 1

        return returnStatus

    def unindex_object(self, documentId):
        """ Unindex the object with integer id 'documentId' and don't
        raise an exception if we fail
        """
        unindexRecord = self._unindex.get(documentId, _marker)
        if unindexRecord is _marker:
            return None

        self.removeForwardIndexEntry(unindexRecord, documentId)

        try:
            del self._unindex[documentId]
            self._length.change(-1)
        except ConflictError:
            raise
        except Exception:
            LOG.debug('Attempt to unindex nonexistent document'
                      ' with id %s' % documentId,
                      exc_info=True)

    def _apply_index(self, request, resultset=None):
        record = parseIndexRequest(request, self.id, self.query_options)
        if record.keys is None:
            return None

        index = self._index
        indexed = self._index_value

        for key in record.keys:
            if bool(key) is bool(indexed):
                # If we match the indexed value, check index
                return (intersection(index, resultset), (self.id, ))
            else:
                # Otherwise, remove from resultset or _unindex
                if resultset is None:
                    return (union(difference(self._unindex, index),
                                  IISet([])), (self.id, ))
                else:
                    return (difference(resultset, index), (self.id, ))
        return (IISet(), (self.id, ))

    def indexSize(self):
        """Return distinct values, as an optimization we always claim 2."""
        return 2

    def items(self):
        # return a list of value to int set of rid tuples
        indexed = self._index_value
        items = [(bool(indexed), self._index)]
        false = IISet()
        for rid, value in self._unindex.iteritems():
            if value != indexed:
                false.add(rid)
        items.append((not bool(indexed), false))
        return items
Пример #9
0
class BooleanIndex(UnIndex):
    """Index for booleans

       self._index = set([documentId1, documentId2])
       self._unindex = {documentId:[True/False]}

       self._length is the length of the unindex
       self._index_length is the length of the index
       self._index_value is the indexed value

       The document ids in self._index have self._index_value as their value.

       Since there are only two possible values (True/False), the index
       only stores a forward index for the less common value.
       It starts off with the opposite of value of the first document
       and later checks and inverts itself, if more than 60% of all
       documents now have the indexed value. It does the inversion
       at 60% to avoid inverting itself constantly for an index that
       has a roughly equal 50/50 split.
    """

    meta_type = 'BooleanIndex'

    manage_options = (
        {
            'label': 'Settings',
            'action': 'manage_main'
        },
        {
            'label': 'Browse',
            'action': 'manage_browse'
        },
    )

    query_options = ['query']

    manage = manage_main = DTMLFile('dtml/manageBooleanIndex', globals())
    manage_main._setName('manage_main')
    manage_browse = DTMLFile('../dtml/browseIndex', globals())

    _index_value = 1
    _index_length = None

    def clear(self):
        self._index = IITreeSet()
        self._index_length = BTrees.Length.Length()
        self._index_value = 1
        self._unindex = IIBTree()
        self._length = BTrees.Length.Length()
        if self._counter is None:
            self._counter = BTrees.Length.Length()
        else:
            self._increment_counter()

    def histogram(self):
        """Return a mapping which provides a histogram of the number of
        elements found at each point in the index.
        """
        histogram = {}
        indexed = bool(self._index_value)
        histogram[indexed] = self._index_length.value
        histogram[not indexed] = self._length.value - self._index_length.value
        return histogram

    def _invert_index(self, documentId=None):
        self._index_value = indexed = int(not self._index_value)
        self._index.clear()
        length = 0
        for rid, value in self._unindex.iteritems():
            if value == indexed:
                self._index.add(rid)
                length += 1
        # documentId is the rid of the currently processed object that
        # triggered the invert. in the case of unindexing, the rid hasn't
        # been removed from the unindex yet. While indexing, the rid will
        # be added to the index and unindex after this method is done
        if documentId is not None:
            self._index.remove(documentId)
            length -= 1
        self._index_length = BTrees.Length.Length(length)

    def insertForwardIndexEntry(self, entry, documentId):
        """If the value matches the indexed one, insert into treeset
        """
        # When we get the first entry, decide to index the opposite of what
        # we got, as indexing zero items is fewer than one.
        if self._length.value == 0:
            self._index_value = int(not bool(entry))

        # if the added entry value is index value, insert it into index
        if bool(entry) is bool(self._index_value):
            self._index_length.change(1)
            self._index.insert(documentId)

        # insert value into global unindex (before computing index invert)
        self._unindex[documentId] = entry
        self._length.change(1)

        # is the index (after adding the current entry) larger than 60%
        # of the total length? than switch the indexed value
        if bool(entry) is bool(self._index_value):
            if (self._index_length.value) >= ((self._length.value) * 0.6):
                self._invert_index()

    def removeForwardIndexEntry(self, entry, documentId, check=True):
        """Take the entry provided and remove any reference to documentId
        in its entry in the index.
        """
        if bool(entry) is bool(self._index_value):
            try:
                self._index.remove(documentId)
                self._index_length.change(-1)
            except ConflictError:
                raise
            except Exception:
                LOG.exception(
                    '%(context)s: unindex_object could not '
                    'remove documentId %(doc_id)s from '
                    'index %(index)r. This should not '
                    'happen.',
                    dict(context=self.__class__.__name__,
                         doc_id=documentId,
                         index=self.id))
        elif check:
            # is the index (after removing the current entry) larger than
            # 60% of the total length? than switch the indexed value
            if (self._index_length.value) <= ((self._length.value - 1) * 0.6):
                self._invert_index(documentId)
                return

    def _index_object(self, documentId, obj, threshold=None, attr=''):
        """ index and object 'obj' with integer id 'documentId'"""
        returnStatus = 0

        # First we need to see if there's anything interesting to look at
        datum = self._get_object_datum(obj, attr)

        # Make it boolean, int as an optimization
        if datum is not _marker:
            datum = int(bool(datum))

        # We don't want to do anything that we don't have to here, so we'll
        # check to see if the new and existing information is the same.
        oldDatum = self._unindex.get(documentId, _marker)
        if datum != oldDatum:
            if oldDatum is not _marker:
                self.removeForwardIndexEntry(oldDatum, documentId, check=False)
                if datum is _marker:
                    try:
                        del self._unindex[documentId]
                        self._length.change(-1)
                    except ConflictError:
                        raise
                    except Exception:
                        LOG.error(
                            'Should not happen: oldDatum was there, now '
                            'its not, for document with id %s', documentId)

            if datum is not _marker:
                self.insertForwardIndexEntry(datum, documentId)

            returnStatus = 1

        return returnStatus

    def unindex_object(self, documentId):
        """ Unindex the object with integer id 'documentId' and don't
        raise an exception if we fail
        """
        unindexRecord = self._unindex.get(documentId, _marker)
        if unindexRecord is _marker:
            return None

        self._increment_counter()

        self.removeForwardIndexEntry(unindexRecord, documentId)

        try:
            del self._unindex[documentId]
            self._length.change(-1)
        except ConflictError:
            raise
        except Exception:
            LOG.debug('Attempt to unindex nonexistent document'
                      ' with id %s',
                      documentId,
                      exc_info=True)

    def query_index(self, record, resultset=None):
        index = self._index
        indexed = self._index_value

        for key in record.keys:
            if bool(key) is bool(indexed):
                # If we match the indexed value, check index
                return intersection(index, resultset)
            else:
                # Otherwise, remove from resultset or _unindex
                if resultset is None:
                    return union(difference(self._unindex, index), IISet([]))
                else:
                    return intersection(difference(resultset, index),
                                        self._unindex)
        return IISet()

    def indexSize(self):
        """Return distinct values, as an optimization we always claim 2."""
        return 2

    def items(self):
        # return a list of value to int set of rid tuples
        indexed = self._index_value
        items = [(bool(indexed), self._index)]
        false = IISet()
        for rid, value in self._unindex.iteritems():
            if value != indexed:
                false.add(rid)
        items.append((not bool(indexed), false))
        return items

    def uniqueValues(self, name=None, withLengths=0):
        """returns the unique values for name

        if withLengths is true, returns a sequence of
        tuples of (value, length)
        """
        if name is None:
            name = self.id
        elif name != self.id:
            return

        indexed = bool(self._index_value)
        unique_values = (indexed, not indexed)
        if not withLengths:
            for key in unique_values:
                yield key
        else:
            for key in unique_values:
                ilen = len(self._index)
                if key is indexed:
                    yield (key, ilen)
                else:
                    ulen = len(self._unindex)
                    yield (key, ulen - ilen)
Пример #10
0
class DateRangeIndex(UnIndex):
    """Index for date ranges, such as the "effective-expiration" range in CMF.

    Any object may return None for either the start or the end date: for the
    start date, this should be the logical equivalent of "since the beginning
    of time"; for the end date, "until the end of time".

    Therefore, divide the space of indexed objects into four containers:

    - Objects which always match (i.e., they returned None for both);

    - Objects which match after a given time (i.e., they returned None for the
      end date);

    - Objects which match until a given time (i.e., they returned None for the
      start date);

    - Objects which match only during a specific interval.
    """

    __implements__ = UnIndex.__implements__
    implements(IDateRangeIndex)

    security = ClassSecurityInfo()

    meta_type = "DateRangeIndex"

    manage_options = ({
        'label': 'Properties',
        'action': 'manage_indexProperties'
    }, )

    query_options = ['query']

    since_field = until_field = None

    def __init__(self,
                 id,
                 since_field=None,
                 until_field=None,
                 caller=None,
                 extra=None):

        if extra:
            since_field = extra.since_field
            until_field = extra.until_field

        self._setId(id)
        self._edit(since_field, until_field)
        self.clear()

    security.declareProtected(VIEW_PERMISSION, 'getSinceField')

    def getSinceField(self):
        """Get the name of the attribute indexed as start date.
        """
        return self._since_field

    security.declareProtected(VIEW_PERMISSION, 'getUntilField')

    def getUntilField(self):
        """Get the name of the attribute indexed as end date.
        """
        return self._until_field

    manage_indexProperties = DTMLFile('manageDateRangeIndex', _dtmldir)

    security.declareProtected(INDEX_MGMT_PERMISSION, 'manage_edit')

    def manage_edit(self, since_field, until_field, REQUEST):
        """
        """
        self._edit(since_field, until_field)
        REQUEST['RESPONSE'].redirect('%s/manage_main'
                                     '?manage_tabs_message=Updated' %
                                     REQUEST.get('URL2'))

    security.declarePrivate('_edit')

    def _edit(self, since_field, until_field):
        """
            Update the fields used to compute the range.
        """
        self._since_field = since_field
        self._until_field = until_field

    security.declareProtected(INDEX_MGMT_PERMISSION, 'clear')

    def clear(self):
        """
            Start over fresh.
        """
        self._always = IITreeSet()
        self._since_only = IOBTree()
        self._until_only = IOBTree()
        self._since = IOBTree()
        self._until = IOBTree()
        self._unindex = IOBTree()  # 'datum' will be a tuple of date ints
        self._length = BTrees.Length.Length()

    #
    #   PluggableIndexInterface implementation (XXX inherit assertions?)
    #
    def getEntryForObject(self, documentId, default=None):
        """
            Get all information contained for the specific object
            identified by 'documentId'.  Return 'default' if not found.
        """
        return self._unindex.get(documentId, default)

    def index_object(self, documentId, obj, threshold=None):
        """
            Index an object:

             - 'documentId' is the integer ID of the document

             - 'obj' is the object to be indexed

             - ignore threshold
        """
        if self._since_field is None:
            return 0

        since = getattr(obj, self._since_field, None)
        if safe_callable(since):
            since = since()
        since = self._convertDateTime(since)

        until = getattr(obj, self._until_field, None)
        if safe_callable(until):
            until = until()
        until = self._convertDateTime(until)

        datum = (since, until)

        old_datum = self._unindex.get(documentId, None)
        if datum == old_datum:  # No change?  bail out!
            return 0

        if old_datum is not None:
            old_since, old_until = old_datum
            self._removeForwardIndexEntry(old_since, old_until, documentId)

        self._insertForwardIndexEntry(since, until, documentId)
        self._unindex[documentId] = datum

        return 1

    def unindex_object(self, documentId):
        """
            Remove the object corresponding to 'documentId' from the index.
        """
        datum = self._unindex.get(documentId, None)

        if datum is None:
            return

        since, until = datum

        self._removeForwardIndexEntry(since, until, documentId)
        del self._unindex[documentId]

    def uniqueValues(self, name=None, withLengths=0):
        """
            Return a list of unique values for 'name'.

            If 'withLengths' is true, return a sequence of tuples, in
            the form '( value, length )'.
        """
        if not name in (self._since_field, self._until_field):
            return []

        if name == self._since_field:

            t1 = self._since
            t2 = self._since_only

        else:

            t1 = self._until
            t2 = self._until_only

        result = []
        IntType = type(0)

        if not withLengths:

            result.extend(t1.keys())
            result.extend(t2.keys())

        else:

            for key in t1.keys():
                set = t1[key]
                if type(set) is IntType:
                    length = 1
                else:
                    length = len(set)
                result.append((key, length))

            for key in t2.keys():
                set = t2[key]
                if type(set) is IntType:
                    length = 1
                else:
                    length = len(set)
                result.append((key, length))

        return tuple(result)

    def _apply_index(self, request, cid=''):
        """
            Apply the index to query parameters given in 'request', which
            should be a mapping object.

            If the request does not contain the needed parametrs, then
            return None.

            If the request contains a parameter with the name of the
            column + "_usage", snif for information on how to handle
            applying the index.

            Otherwise return two objects.  The first object is a ResultSet
            containing the record numbers of the matching records.  The
            second object is a tuple containing the names of all data fields
            used.
        """
        record = parseIndexRequest(request, self.getId())
        if record.keys is None:
            return None

        term = self._convertDateTime(record.keys[0])

        #
        #   Aggregate sets for each bucket separately, to avoid
        #   large-small union penalties.
        #
        #until_only  = IISet()
        #map( until_only.update, self._until_only.values( term ) )
        # XXX use multi-union
        until_only = multiunion(self._until_only.values(term))

        #since_only  = IISet()
        #map( since_only.update, self._since_only.values( None, term ) )
        # XXX use multi-union
        since_only = multiunion(self._since_only.values(None, term))

        #until       = IISet()
        #map( until.update, self._until.values( term ) )
        # XXX use multi-union
        until = multiunion(self._until.values(term))

        #since       = IISet()
        #map( since.update, self._since.values( None, term ) )
        # XXX use multi-union
        since = multiunion(self._since.values(None, term))

        bounded = intersection(until, since)

        #   Merge from smallest to largest.
        #result      = union( self._always, until_only )
        result = union(bounded, until_only)
        result = union(result, since_only)
        #result      = union( result, bounded )
        result = union(result, self._always)

        return result, (self._since_field, self._until_field)

    #
    #   ZCatalog needs this, although it isn't (yet) part of the interface.
    #
    security.declareProtected(VIEW_PERMISSION, 'numObjects')

    def numObjects(self):
        """ """
        return len(self._unindex)

    def indexSize(self):
        """ """
        return len(self)

    #
    #   Helper functions.
    #
    def _insertForwardIndexEntry(self, since, until, documentId):
        """
            Insert 'documentId' into the appropriate set based on
            'datum'.
        """
        if since is None and until is None:

            self._always.insert(documentId)

        elif since is None:

            set = self._until_only.get(until, None)
            if set is None:
                set = self._until_only[until] = IISet()  # XXX: Store an int?
            set.insert(documentId)

        elif until is None:

            set = self._since_only.get(since, None)
            if set is None:
                set = self._since_only[since] = IISet()  # XXX: Store an int?
            set.insert(documentId)

        else:

            set = self._since.get(since, None)
            if set is None:
                set = self._since[since] = IISet()  # XXX: Store an int?
            set.insert(documentId)

            set = self._until.get(until, None)
            if set is None:
                set = self._until[until] = IISet()  # XXX: Store an int?
            set.insert(documentId)

    def _removeForwardIndexEntry(self, since, until, documentId):
        """
            Remove 'documentId' from the appropriate set based on
            'datum'.
        """
        if since is None and until is None:

            self._always.remove(documentId)

        elif since is None:

            set = self._until_only.get(until, None)
            if set is not None:

                set.remove(documentId)

                if not set:
                    del self._until_only[until]

        elif until is None:

            set = self._since_only.get(since, None)
            if set is not None:

                set.remove(documentId)

                if not set:
                    del self._since_only[since]

        else:

            set = self._since.get(since, None)
            if set is not None:
                set.remove(documentId)

                if not set:
                    del self._since[since]

            set = self._until.get(until, None)
            if set is not None:
                set.remove(documentId)

                if not set:
                    del self._until[until]

    def _convertDateTime(self, value):
        if value is None:
            return value
        if type(value) == type(''):
            dt_obj = DateTime(value)
            value = dt_obj.millis() / 1000 / 60  # flatten to minutes
        if isinstance(value, DateTime):
            value = value.millis() / 1000 / 60  # flatten to minutes
        result = int(value)
        if isinstance(result, long):  # this won't work (Python 2.3)
            raise OverflowError('%s is not within the range of dates allowed'
                                'by a DateRangeIndex' % value)
        return result
Пример #11
0
class DateRangeIndex(UnIndex):
    """Index for date ranges, such as the "effective-expiration" range in CMF.

    Any object may return None for either the start or the end date: for the
    start date, this should be the logical equivalent of "since the beginning
    of time"; for the end date, "until the end of time".

    Therefore, divide the space of indexed objects into four containers:

    - Objects which always match (i.e., they returned None for both);

    - Objects which match after a given time (i.e., they returned None for the
      end date);

    - Objects which match until a given time (i.e., they returned None for the
      start date);

    - Objects which match only during a specific interval.
    """
    implements(IDateRangeIndex)

    security = ClassSecurityInfo()

    meta_type = "DateRangeIndex"
    query_options = ('query', )

    manage_options = ({
        'label': 'Properties',
        'action': 'manage_indexProperties'
    }, )

    since_field = until_field = None

    # int(DateTime('1000/1/1 0:00 GMT-12').millis() / 1000 / 60)
    floor_value = -510162480
    # int(DateTime('2499/12/31 0:00 GMT+12').millis() / 1000 / 60)
    ceiling_value = 278751600

    def __init__(self,
                 id,
                 since_field=None,
                 until_field=None,
                 caller=None,
                 extra=None,
                 floor_value=None,
                 ceiling_value=None):

        if extra:
            since_field = extra.since_field
            until_field = extra.until_field
            floor_value = getattr(extra, 'floor_value', None)
            ceiling_value = getattr(extra, 'ceiling_value', None)

        self._setId(id)
        self._edit(since_field, until_field, floor_value, ceiling_value)
        self.clear()

    security.declareProtected(view, 'getSinceField')

    def getSinceField(self):
        """Get the name of the attribute indexed as start date.
        """
        return self._since_field

    security.declareProtected(view, 'getUntilField')

    def getUntilField(self):
        """Get the name of the attribute indexed as end date.
        """
        return self._until_field

    security.declareProtected(view, 'getFloorValue')

    def getFloorValue(self):
        """"""
        return self.floor_value

    security.declareProtected(view, 'getCeilingValue')

    def getCeilingValue(self):
        """"""
        return self.ceiling_value

    manage_indexProperties = DTMLFile('manageDateRangeIndex', _dtmldir)

    security.declareProtected(manage_zcatalog_indexes, 'manage_edit')

    def manage_edit(self, since_field, until_field, floor_value, ceiling_value,
                    REQUEST):
        """
        """
        self._edit(since_field, until_field, floor_value, ceiling_value)
        REQUEST['RESPONSE'].redirect('%s/manage_main'
                                     '?manage_tabs_message=Updated' %
                                     REQUEST.get('URL2'))

    security.declarePrivate('_edit')

    def _edit(self,
              since_field,
              until_field,
              floor_value=None,
              ceiling_value=None):
        """Update the fields used to compute the range.
        """
        self._since_field = since_field
        self._until_field = until_field
        if floor_value is not None:
            self.floor_value = int(floor_value)
        if ceiling_value is not None:
            self.ceiling_value = int(ceiling_value)

    security.declareProtected(manage_zcatalog_indexes, 'clear')

    def clear(self):
        """
            Start over fresh.
        """
        self._always = IITreeSet()
        self._since_only = IOBTree()
        self._until_only = IOBTree()
        self._since = IOBTree()
        self._until = IOBTree()
        self._unindex = IOBTree()  # 'datum' will be a tuple of date ints
        self._length = Length()

    #
    #   PluggableIndexInterface implementation (XXX inherit assertions?)
    #
    def getEntryForObject(self, documentId, default=None):
        """
            Get all information contained for the specific object
            identified by 'documentId'.  Return 'default' if not found.
        """
        return self._unindex.get(documentId, default)

    def index_object(self, documentId, obj, threshold=None):
        """
            Index an object:

             - 'documentId' is the integer ID of the document

             - 'obj' is the object to be indexed

             - ignore threshold
        """
        if self._since_field is None:
            return 0

        since = getattr(obj, self._since_field, None)
        if safe_callable(since):
            since = since()
        since = self._convertDateTime(since)

        until = getattr(obj, self._until_field, None)
        if safe_callable(until):
            until = until()
        until = self._convertDateTime(until)

        datum = (since, until)

        old_datum = self._unindex.get(documentId, None)
        if datum == old_datum:  # No change?  bail out!
            return 0

        if old_datum is not None:
            old_since, old_until = old_datum
            self._removeForwardIndexEntry(old_since, old_until, documentId)

        self._insertForwardIndexEntry(since, until, documentId)
        self._unindex[documentId] = datum

        return 1

    def unindex_object(self, documentId):
        """
            Remove the object corresponding to 'documentId' from the index.
        """
        datum = self._unindex.get(documentId, None)
        if datum is None:
            return

        since, until = datum
        self._removeForwardIndexEntry(since, until, documentId)
        del self._unindex[documentId]

    def uniqueValues(self, name=None, withLengths=0):
        """
            Return a list of unique values for 'name'.

            If 'withLengths' is true, return a sequence of tuples, in
            the form '(value, length)'.
        """
        if not name in (self._since_field, self._until_field):
            return []

        if name == self._since_field:
            t1 = self._since
            t2 = self._since_only
        else:
            t1 = self._until
            t2 = self._until_only

        result = []
        if not withLengths:
            result.extend(t1.keys())
            result.extend(t2.keys())
        else:
            for key in t1.keys():
                set = t1[key]
                if isinstance(set, int):
                    length = 1
                else:
                    length = len(set)
                result.append((key, length))
            for key in t2.keys():
                set = t2[key]
                if isinstance(set, int):
                    length = 1
                else:
                    length = len(set)
                result.append((key, length))
        return tuple(result)

    def _cache_key(self, catalog):
        cid = catalog.getId()
        counter = getattr(aq_base(catalog), 'getCounter', None)
        if counter is not None:
            return '%s_%s' % (cid, counter())
        return cid

    def _apply_index(self, request, resultset=None):
        """
            Apply the index to query parameters given in 'request', which
            should be a mapping object.

            If the request does not contain the needed parameters, then
            return None.

            Otherwise return two objects.  The first object is a ResultSet
            containing the record numbers of the matching records.  The
            second object is a tuple containing the names of all data fields
            used.
        """
        iid = self.id
        record = parseIndexRequest(request, iid, self.query_options)
        if record.keys is None:
            return None

        term = self._convertDateTime(record.keys[0])
        REQUEST = aq_get(self, 'REQUEST', None)
        if REQUEST is not None:
            catalog = aq_parent(aq_parent(aq_inner(self)))
            if catalog is not None:
                key = self._cache_key(catalog)
                cache = REQUEST.get(key, None)
                tid = isinstance(term, int) and term / 10 or 'None'
                if resultset is None:
                    cachekey = '_daterangeindex_%s_%s' % (iid, tid)
                else:
                    cachekey = '_daterangeindex_inverse_%s_%s' % (iid, tid)
                if cache is None:
                    cache = REQUEST[key] = RequestCache()
                else:
                    cached = cache.get(cachekey, None)
                    if cached is not None:
                        if resultset is None:
                            return (cached, (self._since_field,
                                             self._until_field))
                        else:
                            return (difference(resultset, cached),
                                    (self._since_field, self._until_field))

        if resultset is None:
            # Aggregate sets for each bucket separately, to avoid
            # large-small union penalties.
            until_only = multiunion(self._until_only.values(term))
            since_only = multiunion(self._since_only.values(None, term))
            until = multiunion(self._until.values(term))

            # Total result is bound by resultset
            if REQUEST is None:
                until = intersection(resultset, until)

            since = multiunion(self._since.values(None, term))
            bounded = intersection(until, since)

            # Merge from smallest to largest.
            result = multiunion(
                [bounded, until_only, since_only, self._always])
            if REQUEST is not None and catalog is not None:
                cache[cachekey] = result

            return (result, (self._since_field, self._until_field))
        else:
            # Compute the inverse and subtract from res
            until_only = multiunion(self._until_only.values(None, term - 1))
            since_only = multiunion(self._since_only.values(term + 1))
            until = multiunion(self._until.values(None, term - 1))
            since = multiunion(self._since.values(term + 1))

            result = multiunion([until_only, since_only, until, since])
            if REQUEST is not None and catalog is not None:
                cache[cachekey] = result

            return (difference(resultset,
                               result), (self._since_field, self._until_field))

    def _insert_migrate(self, tree, key, value):
        treeset = tree.get(key, None)
        if treeset is None:
            tree[key] = value
        else:
            if isinstance(treeset, int):
                tree[key] = IITreeSet((treeset, value))
            elif isinstance(treeset, IISet):
                tree[key] = IITreeSet(treeset)
                tree[key].insert(value)
            else:
                treeset.insert(value)

    def _insertForwardIndexEntry(self, since, until, documentId):
        """Insert 'documentId' into the appropriate set based on 'datum'.
        """
        if since is None and until is None:
            self._always.insert(documentId)
        elif since is None:
            self._insert_migrate(self._until_only, until, documentId)
        elif until is None:
            self._insert_migrate(self._since_only, since, documentId)
        else:
            self._insert_migrate(self._since, since, documentId)
            self._insert_migrate(self._until, until, documentId)

    def _remove_delete(self, tree, key, value):
        treeset = tree.get(key, None)
        if treeset is not None:
            if isinstance(treeset, int):
                del tree[key]
            else:
                treeset.remove(value)
                if not treeset:
                    del tree[key]

    def _removeForwardIndexEntry(self, since, until, documentId):
        """Remove 'documentId' from the appropriate set based on 'datum'.
        """
        if since is None and until is None:
            self._always.remove(documentId)
        elif since is None:
            self._remove_delete(self._until_only, until, documentId)
        elif until is None:
            self._remove_delete(self._since_only, since, documentId)
        else:
            self._remove_delete(self._since, since, documentId)
            self._remove_delete(self._until, until, documentId)

    def _convertDateTime(self, value):
        if value is None:
            return value
        if isinstance(value, (str, datetime)):
            dt_obj = DateTime(value)
            value = dt_obj.millis() / 1000 / 60  # flatten to minutes
        elif isinstance(value, DateTime):
            value = value.millis() / 1000 / 60  # flatten to minutes
        if value > MAX32 or value < -MAX32:
            # t_val must be integer fitting in the 32bit range
            raise OverflowError('%s is not within the range of dates allowed'
                                'by a DateRangeIndex' % value)
        value = int(value)
        # handle values outside our specified range
        if value > self.ceiling_value:
            return None
        elif value < self.floor_value:
            return None
        return value
Пример #12
0
class MessageStorage(Persistent, Location):
    interface.implements(IMessageStorage)

    notify = True
    principalId = None

    def __init__(self, principalId):
        self.index = OIBTree()
        self.messages = IOBTree()
        self.services = OOBTree()
        self.readstatus = IITreeSet()
        self.principalId = principalId

        self._next = Length(1)

    @Lazy
    def readstatus(self):
        self.readstatus = IITreeSet()
        return self.readstatus

    @property
    def principal(self):
        try:
            return getUtility(IAuthentication).getPrincipal(self.principalId)
        except:
            return None

    @property
    def unread(self):
        unread = 0
        for serviceId in self.services.keys():
            service = self.getService(serviceId)
            unread = unread + service.unread()
        return unread

    def getMessage(self, messageId):
        return self.messages.get(messageId)

    def getServiceIds(self):
        return list(self.services.keys())

    def getService(self, serviceId):
        service = self.services.get(serviceId)

        if not IMessageService.providedBy(service):
            factory = getUtility(IMessageServiceFactory, serviceId)
            service = factory(self)
            self.services[serviceId] = service

        return service

    def create(self, serviceId, **data):
        """ create and append message to storage """
        id = self._next()
        self._next.change(1)

        service = self.getService(serviceId)

        msg = service.create(**data)
        date = datetime.now(ITZInfo(self.principal, pytz.utc))

        while date in self.index:
            date = date + timedelta

        msg.__id__ = id
        msg.__date__ = date

        self.index[date] = id
        self.messages[id] = msg
        self.readstatus.insert(id)

        service.append(msg)

        event.notify(MessageCreatedEvent(msg, self))
        return id

    def remove(self, messageId):
        message = self.messages.get(messageId)

        if message is None:
            return
        else:
            self.clearReadStatus(message)

            del self.index[message.__date__]
            del self.messages[message.__id__]

            for serviceId in self.services.keys():
                service = self.getService(serviceId)
                service.remove(message)

            event.notify(MessageRemovedEvent(message, self))

    def readStatus(self, message):
        return message.__id__ in self.readstatus

    def clearReadStatus(self, message):
        if message.__id__ not in self.readstatus:
            return

        idx = message.__date__
        for serviceId in self.services.keys():
            service = self.getService(serviceId)
            if idx in service.index and service.unread() > 0:
                service.unread.change(-1)

        self.readstatus.remove(message.__id__)
Пример #13
0
class DateRangeIndex(UnIndex):
    """
        Index a date range, such as the canonical "effective-expiration"
        range in the CMF.  Any object may return None for either the
        start or the end date:  for the start date, this should be
        the logical equivalent of "since the beginning of time";  for the
        end date, "until the end of time".

        Therefore, divide the space of indexed objects into four containers:

        - Objects which always match ( i.e., they returned None for both );

        - Objects which match after a given time ( i.e., they returned None
          for the end date );

        - Objects which match until a given time ( i.e., they returned None
          for the start date );

        - Objects which match only during a specific interval.
    """
    __implements__ = ( PluggableIndex.PluggableIndexInterface, )

    security = ClassSecurityInfo()

    meta_type = "DateRangeIndex"

    manage_options= ( { 'label'     : 'Properties'
                      , 'action'    : 'manage_indexProperties'
                      }
                    ,
                    )

    query_options = ['query']

    since_field = until_field = None

    def __init__(self, id, since_field=None, until_field=None,
            caller=None, extra=None):

        if extra:
            since_field = extra.since_field
            until_field = extra.until_field

        self._setId(id)
        self._edit(since_field, until_field)
        self.clear()

    security.declareProtected( VIEW_PERMISSION
                             , 'getSinceField'
                             )
    def getSinceField( self ):
        """
        """
        return self._since_field

    security.declareProtected( VIEW_PERMISSION
                             , 'getUntilField'
                             )
    def getUntilField( self ):
        """
        """
        return self._until_field

    manage_indexProperties = DTMLFile( 'manageDateRangeIndex', _dtmldir )

    security.declareProtected( INDEX_MGMT_PERMISSION
                             , 'manage_edit'
                             )
    def manage_edit( self, since_field, until_field, REQUEST ):
        """
        """
        self._edit( since_field, until_field )
        REQUEST[ 'RESPONSE' ].redirect( '%s/manage_main'
                                        '?manage_tabs_message=Updated'
                                      % REQUEST.get('URL2')
                                      )

    security.declarePrivate( '_edit' )
    def _edit( self, since_field, until_field ):
        """
            Update the fields used to compute the range.
        """
        self._since_field = since_field
        self._until_field = until_field


    security.declareProtected( INDEX_MGMT_PERMISSION
                             , 'clear'
                             )
    def clear( self ):
        """
            Start over fresh.
        """
        self._always        = IITreeSet()
        self._since_only    = IOBTree()
        self._until_only    = IOBTree()
        self._since         = IOBTree()
        self._until         = IOBTree()
        self._unindex       = IOBTree() # 'datum' will be a tuple of date ints

    #
    #   PluggableIndexInterface implementation (XXX inherit assertions?)
    #
    def getEntryForObject( self, documentId, default=None ):
        """
            Get all information contained for the specific object
            identified by 'documentId'.  Return 'default' if not found.
        """
        return self._unindex.get( documentId, default )

    def index_object( self, documentId, obj, threshold=None ):
        """
            Index an object:

             - 'documentId' is the integer ID of the document

             - 'obj' is the object to be indexed

             - ignore threshold
        """
        if self._since_field is None:
            return 0

        since = getattr( obj, self._since_field, None )
        if callable( since ):
            since = since()
        since = self._convertDateTime( since )

        until = getattr( obj, self._until_field, None )
        if callable( until ):
            until = until()
        until = self._convertDateTime( until )

        datum = ( since, until )

        old_datum = self._unindex.get( documentId, None )
        if datum == old_datum: # No change?  bail out!
            return 0

        if old_datum is not None:
            old_since, old_until = old_datum
            self._removeForwardIndexEntry( old_since, old_until, documentId )

        self._insertForwardIndexEntry( since, until, documentId )
        self._unindex[ documentId ] = datum

        return 1

    def unindex_object( self, documentId ):
        """
            Remove the object corresponding to 'documentId' from the index.
        """
        datum = self._unindex.get( documentId, None )

        if datum is None:
            return

        since, until = datum

        self._removeForwardIndexEntry( since, until, documentId )
        del self._unindex[ documentId ]

    def uniqueValues( self, name=None, withLengths=0 ):
        """
            Return a list of unique values for 'name'.

            If 'withLengths' is true, return a sequence of tuples, in
            the form '( value, length )'.
        """
        if not name in ( self._since_field, self._until_field ):
            return []

        if name == self._since_field:

            t1 = self._since
            t2 = self._since_only

        else:

            t1 = self._until
            t2 = self._until_only

        result = []
        IntType = type( 0 )

        if not withValues:

            result.extend( t1.keys() )
            result.extend( t2.keys() )

        else:

            for key in t1.keys():
                set = t1[ key ]
                if type( set ) is IntType:
                    length = 1
                else:
                    length = len( set )
                result.append( ( key, length) )

            for key in t2.keys():
                set = t2[ key ]
                if type( set ) is IntType:
                    length = 1
                else:
                    length = len( set )
                result.append( ( key, length) )

        return tuple( result )

    def _apply_index( self, request, cid='' ):
        """
            Apply the index to query parameters given in 'request', which
            should be a mapping object.

            If the request does not contain the needed parametrs, then
            return None.

            If the request contains a parameter with the name of the
            column + "_usage", snif for information on how to handle
            applying the index.

            Otherwise return two objects.  The first object is a ResultSet
            containing the record numbers of the matching records.  The
            second object is a tuple containing the names of all data fields
            used.
        """
        record = parseIndexRequest( request, self.getId() )
        if record.keys is None:
            return None

        term        = self._convertDateTime( record.keys[0] )

        #
        #   Aggregate sets for each bucket separately, to avoid
        #   large-small union penalties.
        #
        #until_only  = IISet()
        #map( until_only.update, self._until_only.values( term ) )
        # XXX use multi-union
        until_only = multiunion( self._until_only.values( term ) )

        #since_only  = IISet()
        #map( since_only.update, self._since_only.values( None, term ) )
        # XXX use multi-union
        since_only = multiunion( self._since_only.values( None, term ) )

        #until       = IISet()
        #map( until.update, self._until.values( term ) )
        # XXX use multi-union
        until = multiunion( self._until.values( term ) )

        #since       = IISet()
        #map( since.update, self._since.values( None, term ) )
        # XXX use multi-union
        since = multiunion( self._since.values( None, term ) )

        bounded     = intersection( until, since )

        #   Merge from smallest to largest.
        #result      = union( self._always, until_only )
        result      = union( bounded, until_only )
        result      = union( result, since_only )
        #result      = union( result, bounded )
        result      = union( result, self._always )

        return result, ( self._since_field, self._until_field )

    #
    #   ZCatalog needs this, although it isn't (yet) part of the interface.
    #
    security.declareProtected( VIEW_PERMISSION , 'numObjects' )
    def numObjects( self ):
        """
        """
        return len( self._unindex )

    #
    #   Helper functions.
    #
    def _insertForwardIndexEntry( self, since, until, documentId ):
        """
            Insert 'documentId' into the appropriate set based on
            'datum'.
        """
        if since is None and until is None:

            self._always.insert( documentId )

        elif since is None:

            set = self._until_only.get( until, None )
            if set is None:
                set = self._until_only[ until ] = IISet()  # XXX: Store an int?
            set.insert( documentId )

        elif until is None:

            set = self._since_only.get( since, None )
            if set is None:
                set = self._since_only[ since ] = IISet()  # XXX: Store an int?
            set.insert( documentId )

        else:

            set = self._since.get( since, None )
            if set is None:
                set = self._since[ since ] = IISet()   # XXX: Store an int?
            set.insert( documentId )

            set = self._until.get( until, None )
            if set is None:
                set = self._until[ until ] = IISet() # XXX: Store an int?
            set.insert( documentId )

    def _removeForwardIndexEntry( self, since, until, documentId ):
        """
            Remove 'documentId' from the appropriate set based on
            'datum'.
        """
        if since is None and until is None:

            self._always.remove( documentId )

        elif since is None:

            set = self._until_only.get( until, None )
            if set is not None:

                set.remove( documentId )

                if not set:
                    del self._until_only[ until ]

        elif until is None:

            set = self._since_only.get( since, None )
            if set is not None:

                set.remove( documentId )

                if not set:
                    del self._since_only[ since ]

        else:

            set = self._since.get( since, None )
            if set is not None:
                set.remove( documentId )

                if not set:
                    del self._since[ since ]

            set = self._until.get( until, None )
            if set is not None:
                set.remove( documentId )

                if not set:
                    del self._until[ until ]

    def _convertDateTime( self, value ):
        if value is None:
            return value
        if type( value ) == type( '' ):
            dt_obj = DateTime( value )
            value = dt_obj.millis() / 1000 / 60 # flatten to minutes
        if isinstance( value, DateTime ):
            value = value.millis() / 1000 / 60 # flatten to minutes
        return int( value )
Пример #14
0
class BooleanIndex(UnIndex):
    """Index for booleans

       self._index = set([documentId1, documentId2])
       self._unindex = {documentId:[True/False]}

       False doesn't have actual entries in _index.
    """

    meta_type = "BooleanIndex"

    manage_options = (
        {
            'label': 'Settings',
            'action': 'manage_main'
        },
        {
            'label': 'Browse',
            'action': 'manage_browse'
        },
    )

    query_options = ["query"]

    manage = manage_main = DTMLFile('dtml/manageBooleanIndex', globals())
    manage_main._setName('manage_main')
    manage_browse = DTMLFile('../dtml/browseIndex', globals())

    def clear(self):
        self._length = BTrees.Length.Length()
        self._index = IITreeSet()
        self._unindex = IIBTree()

    def insertForwardIndexEntry(self, entry, documentId):
        """If True, insert directly into treeset
        """
        if entry:
            self._index.insert(documentId)
            self._length.change(1)

    def removeForwardIndexEntry(self, entry, documentId):
        """Take the entry provided and remove any reference to documentId
        in its entry in the index.
        """
        try:
            if entry:
                self._index.remove(documentId)
                self._length.change(-1)
        except ConflictError:
            raise
        except Exception:
            LOG.exception(
                '%s: unindex_object could not remove '
                'documentId %s from index %s. This '
                'should not happen.' %
                (self.__class__.__name__, str(documentId), str(self.id)))

    def _index_object(self, documentId, obj, threshold=None, attr=''):
        """ index and object 'obj' with integer id 'documentId'"""
        returnStatus = 0

        # First we need to see if there's anything interesting to look at
        datum = self._get_object_datum(obj, attr)

        # Make it boolean, int as an optimization
        if datum is not _marker:
            datum = int(bool(datum))

        # We don't want to do anything that we don't have to here, so we'll
        # check to see if the new and existing information is the same.
        oldDatum = self._unindex.get(documentId, _marker)
        if datum != oldDatum:
            if oldDatum is not _marker:
                self.removeForwardIndexEntry(oldDatum, documentId)
                if datum is _marker:
                    try:
                        del self._unindex[documentId]
                    except ConflictError:
                        raise
                    except Exception:
                        LOG.error('Should not happen: oldDatum was there, now '
                                  'its not, for document with id %s' %
                                  documentId)

            if datum is not _marker:
                if datum:
                    self.insertForwardIndexEntry(datum, documentId)
                self._unindex[documentId] = datum

            returnStatus = 1

        return returnStatus

    def _apply_index(self, request, resultset=None):
        record = parseIndexRequest(request, self.id, self.query_options)
        if record.keys is None:
            return None

        index = self._index

        for key in record.keys:
            if key:
                # If True, check index
                return (intersection(index, resultset), (self.id, ))
            else:
                # Otherwise, remove from resultset or _unindex
                if resultset is None:
                    return (union(difference(self._unindex, index),
                                  IISet([])), (self.id, ))
                else:
                    return (difference(resultset, index), (self.id, ))
        return (IISet(), (self.id, ))

    def indexSize(self):
        """Return distinct values, as an optimization we always claim 2."""
        return 2

    def items(self):
        items = []
        for v, k in self._unindex.items():
            if isinstance(v, int):
                v = IISet((v, ))
            items.append((k, v))
        return items
class DateRangeIndex(UnIndex):

    """Index for date ranges, such as the "effective-expiration" range in CMF.

    Any object may return None for either the start or the end date: for the
    start date, this should be the logical equivalent of "since the beginning
    of time"; for the end date, "until the end of time".

    Therefore, divide the space of indexed objects into four containers:

    - Objects which always match (i.e., they returned None for both);

    - Objects which match after a given time (i.e., they returned None for the
      end date);

    - Objects which match until a given time (i.e., they returned None for the
      start date);

    - Objects which match only during a specific interval.
    """

    security = ClassSecurityInfo()

    meta_type = "DateRangeIndex"
    query_options = ('query', )

    manage_options = ({'label': 'Properties',
                       'action': 'manage_indexProperties'},
                      )

    since_field = until_field = None

    # int(DateTime('1000/1/1 0:00 GMT-12').millis() / 1000 / 60)
    floor_value = -510162480
    # int(DateTime('2499/12/31 0:00 GMT+12').millis() / 1000 / 60)
    ceiling_value = 278751600

    def __init__(self, id, since_field=None, until_field=None,
                 caller=None, extra=None,
                 floor_value=None, ceiling_value=None):

        if extra:
            since_field = extra.since_field
            until_field = extra.until_field
            floor_value = getattr(extra, 'floor_value', None)
            ceiling_value = getattr(extra, 'ceiling_value', None)

        self._setId(id)
        self._edit(since_field, until_field, floor_value, ceiling_value)
        self.clear()

    security.declareProtected(view, 'getSinceField')
    def getSinceField(self):
        """Get the name of the attribute indexed as start date.
        """
        return self._since_field

    security.declareProtected(view, 'getUntilField')
    def getUntilField(self):
        """Get the name of the attribute indexed as end date.
        """
        return self._until_field

    security.declareProtected(view, 'getFloorValue')
    def getFloorValue(self):
        """ """
        return self.floor_value

    security.declareProtected(view, 'getCeilingValue')
    def getCeilingValue(self):
        """ """
        return self.ceiling_value

    manage_indexProperties = DTMLFile('manageDateRangeIndex', _dtmldir)

    security.declareProtected(manage_zcatalog_indexes, 'manage_edit')
    def manage_edit(self, since_field, until_field, floor_value,
                    ceiling_value, REQUEST):
        """ """
        self._edit(since_field, until_field, floor_value, ceiling_value)
        REQUEST['RESPONSE'].redirect('%s/manage_main'
                                     '?manage_tabs_message=Updated'
                                     % REQUEST.get('URL2'))

    security.declarePrivate('_edit')
    def _edit(self, since_field, until_field, floor_value=None,
              ceiling_value=None):
        """Update the fields used to compute the range.
        """
        self._since_field = since_field
        self._until_field = until_field
        if floor_value is not None:
            self.floor_value = int(floor_value)
        if ceiling_value is not None:
            self.ceiling_value = int(ceiling_value)

    security.declareProtected(manage_zcatalog_indexes, 'clear')
    def clear(self):
        """Start over fresh."""
        self._always = IITreeSet()
        self._since_only = IOBTree()
        self._until_only = IOBTree()
        self._since = IOBTree()
        self._until = IOBTree()
        self._unindex = IOBTree()  # 'datum' will be a tuple of date ints
        self._length = Length()
        if self._counter is None:
            self._counter = Length()
        else:
            self._increment_counter()

    def getEntryForObject(self, documentId, default=None):
        """Get all information contained for the specific object
        identified by 'documentId'.  Return 'default' if not found.
        """
        return self._unindex.get(documentId, default)

    def index_object(self, documentId, obj, threshold=None):
        """Index an object:
        - 'documentId' is the integer ID of the document
        - 'obj' is the object to be indexed
        - ignore threshold
        """
        if self._since_field is None:
            return 0

        since = getattr(obj, self._since_field, None)
        if safe_callable(since):
            since = since()
        since = self._convertDateTime(since)

        until = getattr(obj, self._until_field, None)
        if safe_callable(until):
            until = until()
        until = self._convertDateTime(until)

        datum = (since, until)

        old_datum = self._unindex.get(documentId, None)
        if datum == old_datum:  # No change?  bail out!
            return 0

        self._increment_counter()

        if old_datum is not None:
            old_since, old_until = old_datum
            self._removeForwardIndexEntry(old_since, old_until, documentId)

        self._insertForwardIndexEntry(since, until, documentId)
        self._unindex[documentId] = datum

        return 1

    def unindex_object(self, documentId):
        """Remove the object corresponding to 'documentId' from the index.
        """

        datum = self._unindex.get(documentId, None)
        if datum is None:
            return

        self._increment_counter()

        since, until = datum
        self._removeForwardIndexEntry(since, until, documentId)
        del self._unindex[documentId]

    def uniqueValues(self, name=None, withLengths=0):
        """Return a sequence of unique values for 'name'.

        If 'withLengths' is true, return a sequence of tuples, in
        the form '(value, length)'.
        """
        if name not in (self._since_field, self._until_field):
            raise StopIteration

        if name == self._since_field:
            sets = (self._since, self._since_only)
        else:
            sets = (self._until, self._until_only)

        if not withLengths:
            for s in sets:
                for key in s.keys():
                    yield key
        else:
            for s in sets:
                for key, value in s.items():
                    if isinstance(value, int):
                        yield (key, 1)
                    else:
                        yield (key, len(value))

    def getRequestCacheKey(self, record, resultset=None):
        term = self._convertDateTime(record.keys[0])
        tid = str(term)

        # unique index identifier
        iid = '_%s_%s_%s' % (self.__class__.__name__,
                             self.id, self.getCounter())
        # record identifier
        if resultset is None:
            rid = '_%s' % (tid, )
        else:
            rid = '_inverse_%s' % (tid, )

        return (iid, rid)

    def _apply_index(self, request, resultset=None):
        record = IndexQuery(request, self.id, self.query_options,
                            self.operators, self.useOperator)
        if record.keys is None:
            return None
        return (self.query_index(record, resultset=resultset),
                (self._since_field, self._until_field))

    def query_index(self, record, resultset=None):
        cache = self.getRequestCache()
        if cache is not None:
            cachekey = self.getRequestCacheKey(record, resultset)
            cached = cache.get(cachekey, None)
            if cached is not None:
                if resultset is None:
                    return cached
                else:
                    return difference(resultset, cached)

        term = self._convertDateTime(record.keys[0])
        if resultset is None:
            # Aggregate sets for each bucket separately, to avoid
            # large-small union penalties.
            until_only = multiunion(self._until_only.values(term))
            since_only = multiunion(self._since_only.values(None, term))
            until = multiunion(self._until.values(term))
            since = multiunion(self._since.values(None, term))
            bounded = intersection(until, since)

            # Merge from smallest to largest.
            result = multiunion([bounded, until_only, since_only,
                                 self._always])
            if cache is not None:
                cache[cachekey] = result

            return result
        else:
            # Compute the inverse and subtract from res
            until_only = multiunion(self._until_only.values(None, term - 1))
            since_only = multiunion(self._since_only.values(term + 1))
            until = multiunion(self._until.values(None, term - 1))
            since = multiunion(self._since.values(term + 1))

            result = multiunion([since, since_only, until_only, until])
            if cache is not None:
                cache[cachekey] = result

            return difference(resultset, result)

    def _insert_migrate(self, tree, key, value):
        treeset = tree.get(key, None)
        if treeset is None:
            tree[key] = IITreeSet((value, ))
        else:
            if isinstance(treeset, IITreeSet):
                treeset.insert(value)
            elif isinstance(treeset, int):
                tree[key] = IITreeSet((treeset, value))
            else:
                tree[key] = IITreeSet(treeset)
                tree[key].insert(value)

    def _insertForwardIndexEntry(self, since, until, documentId):
        """Insert 'documentId' into the appropriate set based on 'datum'.
        """
        if since is None and until is None:
            self._always.insert(documentId)
        elif since is None:
            self._insert_migrate(self._until_only, until, documentId)
        elif until is None:
            self._insert_migrate(self._since_only, since, documentId)
        else:
            self._insert_migrate(self._since, since, documentId)
            self._insert_migrate(self._until, until, documentId)

    def _remove_delete(self, tree, key, value):
        treeset = tree.get(key, None)
        if treeset is not None:
            if isinstance(treeset, int):
                del tree[key]
            else:
                treeset.remove(value)
                if not treeset:
                    del tree[key]

    def _removeForwardIndexEntry(self, since, until, documentId):
        """Remove 'documentId' from the appropriate set based on 'datum'.
        """
        if since is None and until is None:
            self._always.remove(documentId)
        elif since is None:
            self._remove_delete(self._until_only, until, documentId)
        elif until is None:
            self._remove_delete(self._since_only, since, documentId)
        else:
            self._remove_delete(self._since, since, documentId)
            self._remove_delete(self._until, until, documentId)

    def _convertDateTime(self, value):
        if value is None:
            return value
        if isinstance(value, (str, datetime)):
            dt_obj = DateTime(value)
            value = dt_obj.millis() / 1000 / 60  # flatten to minutes
        elif isinstance(value, DateTime):
            value = value.millis() / 1000 / 60  # flatten to minutes
        if value > MAX32 or value < -MAX32:
            # t_val must be integer fitting in the 32bit range
            raise OverflowError('%s is not within the range of dates allowed'
                                'by a DateRangeIndex' % value)
        value = int(value)
        # handle values outside our specified range
        if value > self.ceiling_value:
            return None
        elif value < self.floor_value:
            return None
        return value
Пример #16
0
    def update(self, filter, changes):
        rids = self._select_rids(self.tuplify(filter))
        if rids is None:
            rids = self.data.keys()
        elif not rids:
            # Nothing needs to be updated.
            return 0
        count = len(rids)

        # Identify changes.
        old_data = {}  # rid -> old tuple
        new_data = {}  # rid -> new tuple
        old_to_new = {}  # old primary key -> new primary key
        new_to_rid = {}  # new primary key -> rid

        record = self.tuplify(changes)
        for rid in rids:
            old_r = self.data[rid]
            old_data[rid] = old_r
            new_r = list(old_r)
            # new_r and old_r contain record tuples.
            for position, column in self.col_info:
                if record[position] is not None:
                    new_r[position] = record[position]
            new_data[rid] = tuple(new_r)
            # Hmm.  The code below allows an update to change the primary
            # key.  It might be better to prevent primary key columns from
            # being changed by an update() call.
            opk = []
            npk = []
            for position, column in self.col_info:
                if column.primary:
                    opk.append(old_r[position])
                    npk.append(new_r[position])
            if opk != npk:
                opk = tuple(opk)
                npk = tuple(npk)
                old_to_new[opk] = npk
                new_to_rid[npk] = rid

        # Look for primary key conflicts.  A primary key conflict can
        # occur when changing a record to a different primary key and
        # the new primary key is already in use.
        for pk in old_to_new.values():
            if (self.primary_index.has_key(pk) and not old_to_new.has_key(pk)):
                raise DuplicateError("Primary key %s in use" % repr(pk))

        # Update the data.
        self.data.update(new_data)

        # Remove old primary key indexes and insert new primary key indexes.
        for pk in old_to_new.keys():
            del self.primary_index[pk]
        self.primary_index.update(new_to_rid)

        # Update indexes.
        for rid, old_r in old_data.items():
            for position, column in self.col_info:
                index = self.indexes.get(column.name)
                if index is not None:
                    new_value = record[position]
                    old_value = old_r[position]
                    if new_value != old_value:
                        if old_value is not None and index.has_key(old_value):
                            # Remove an index entry.
                            set = index[old_value]
                            set.remove(rid)
                            if not set:
                                del index[old_value]
                        if new_value is not None:
                            # Add an index entry.
                            set = index.get(new_value)
                            if set is None:
                                set = IITreeSet()
                                index[new_value] = set
                            set.insert(rid)

        # Return the number of rows affected.
        return count
Пример #17
0
class DateRangeIndex(UnIndex):
    """Index for date ranges, such as the "effective-expiration" range in CMF.

    Any object may return None for either the start or the end date: for the
    start date, this should be the logical equivalent of "since the beginning
    of time"; for the end date, "until the end of time".

    Therefore, divide the space of indexed objects into four containers:

    - Objects which always match (i.e., they returned None for both);

    - Objects which match after a given time (i.e., they returned None for the
      end date);

    - Objects which match until a given time (i.e., they returned None for the
      start date);

    - Objects which match only during a specific interval.
    """

    security = ClassSecurityInfo()

    meta_type = 'DateRangeIndex'
    query_options = ('query', )

    manage_options = ({
        'label': 'Properties',
        'action': 'manage_indexProperties'
    }, )

    since_field = until_field = None

    # int(DateTime('1000/1/1 0:00 GMT-12').millis() / 1000 / 60)
    floor_value = -510162480
    # int(DateTime('2499/12/31 0:00 GMT+12').millis() / 1000 / 60)
    ceiling_value = 278751600
    # precision of indexed time interval in minutes
    precision_value = 1

    def __init__(self,
                 id,
                 since_field=None,
                 until_field=None,
                 caller=None,
                 extra=None,
                 floor_value=None,
                 ceiling_value=None,
                 precision_value=None):

        if extra:
            since_field = extra.since_field
            until_field = extra.until_field
            floor_value = getattr(extra, 'floor_value', None)
            ceiling_value = getattr(extra, 'ceiling_value', None)
            precision_value = getattr(extra, 'precision_value', None)

        self._setId(id)
        self._edit(since_field, until_field, floor_value, ceiling_value,
                   precision_value)
        self.clear()

    @security.protected(view)
    def getSinceField(self):
        """Get the name of the attribute indexed as start date.
        """
        return self._since_field

    @security.protected(view)
    def getUntilField(self):
        """Get the name of the attribute indexed as end date.
        """
        return self._until_field

    @security.protected(view)
    def getFloorValue(self):
        """ """
        return self.floor_value

    @security.protected(view)
    def getCeilingValue(self):
        """ """
        return self.ceiling_value

    @security.protected(view)
    def getPrecisionValue(self):
        """ """
        return self.precision_value

    manage_indexProperties = DTMLFile('manageDateRangeIndex', _dtmldir)

    @security.protected(manage_zcatalog_indexes)
    def manage_edit(self, since_field, until_field, floor_value, ceiling_value,
                    precision_value, REQUEST):
        """ """
        self._edit(since_field, until_field, floor_value, ceiling_value,
                   precision_value)
        REQUEST['RESPONSE'].redirect('{0}/manage_main'
                                     '?manage_tabs_message=Updated'.format(
                                         REQUEST.get('URL2')))

    @security.private
    def _edit(self,
              since_field,
              until_field,
              floor_value=None,
              ceiling_value=None,
              precision_value=None):
        """Update the fields used to compute the range.
        """
        self._since_field = since_field
        self._until_field = until_field
        if floor_value not in (None, ''):
            self.floor_value = int(floor_value)
        if ceiling_value not in (None, ''):
            self.ceiling_value = int(ceiling_value)
        if precision_value not in (None, ''):
            self.precision_value = int(precision_value)

    @security.protected(manage_zcatalog_indexes)
    def clear(self):
        """Start over fresh."""
        self._always = IITreeSet()
        self._since_only = IOBTree()
        self._until_only = IOBTree()
        self._since = IOBTree()
        self._until = IOBTree()
        self._unindex = IOBTree()  # 'datum' will be a tuple of date ints
        self._length = Length()
        if self._counter is None:
            self._counter = Length()
        else:
            self._increment_counter()

    def getEntryForObject(self, documentId, default=None):
        """Get all information contained for the specific object
        identified by 'documentId'.  Return 'default' if not found.
        """
        return self._unindex.get(documentId, default)

    def index_object(self, documentId, obj, threshold=None):
        """Index an object:
        - 'documentId' is the integer ID of the document
        - 'obj' is the object to be indexed
        - ignore threshold
        """
        if self._since_field is None:
            return 0

        since = getattr(obj, self._since_field, None)
        if safe_callable(since):
            since = since()
        since = self._convertDateTime(since)

        until = getattr(obj, self._until_field, None)
        if safe_callable(until):
            until = until()
        until = self._convertDateTime(until)

        datum = (since, until)

        old_datum = self._unindex.get(documentId, None)
        if datum == old_datum:  # No change?  bail out!
            return 0

        self._increment_counter()

        if old_datum is not None:
            old_since, old_until = old_datum
            self._removeForwardIndexEntry(old_since, old_until, documentId)

        self._insertForwardIndexEntry(since, until, documentId)
        self._unindex[documentId] = datum

        return 1

    def unindex_object(self, documentId):
        """Remove the object corresponding to 'documentId' from the index.
        """

        datum = self._unindex.get(documentId, None)
        if datum is None:
            return

        self._increment_counter()

        since, until = datum
        self._removeForwardIndexEntry(since, until, documentId)
        del self._unindex[documentId]

    def uniqueValues(self, name=None, withLengths=0):
        """Return a sequence of unique values for 'name'.

        If 'withLengths' is true, return a sequence of tuples, in
        the form '(value, length)'.
        """
        if name not in (self._since_field, self._until_field):
            return

        if name == self._since_field:
            sets = (self._since, self._since_only)
        else:
            sets = (self._until, self._until_only)

        if not withLengths:
            for s in sets:
                for key in s.keys():
                    yield key
        else:
            for s in sets:
                for key, value in s.items():
                    if isinstance(value, int):
                        yield (key, 1)
                    else:
                        yield (key, len(value))

    def getRequestCacheKey(self, record, resultset=None):
        term = self._convertDateTime(record.keys[0])
        tid = str(term)

        # unique index identifier
        iid = '_{0}_{1}_{2}'.format(self.__class__.__name__, self.id,
                                    self.getCounter())
        # record identifier
        if resultset is None:
            rid = '_{0}'.format(tid)
        else:
            rid = '_inverse_{0}'.format(tid)

        return (iid, rid)

    def _apply_index(self, request, resultset=None):
        record = IndexQuery(request, self.id, self.query_options,
                            self.operators, self.useOperator)
        if record.keys is None:
            return None
        return (self.query_index(record, resultset=resultset),
                (self._since_field, self._until_field))

    def query_index(self, record, resultset=None):
        cache = self.getRequestCache()
        if cache is not None:
            cachekey = self.getRequestCacheKey(record, resultset)
            cached = cache.get(cachekey, None)
            if cached is not None:
                if resultset is None:
                    return cached
                else:
                    return difference(resultset, cached)

        term = self._convertDateTime(record.keys[0])
        if resultset is None:
            # Aggregate sets for each bucket separately, to avoid
            # large-small union penalties.
            until_only = multiunion(self._until_only.values(term))
            since_only = multiunion(self._since_only.values(None, term))
            until = multiunion(self._until.values(term))
            since = multiunion(self._since.values(None, term))
            bounded = intersection(until, since)

            # Merge from smallest to largest.
            result = multiunion(
                [bounded, until_only, since_only, self._always])
            if cache is not None:
                cache[cachekey] = result

            return result
        else:
            # Compute the inverse and subtract from res
            until_only = multiunion(self._until_only.values(None, term - 1))
            since_only = multiunion(self._since_only.values(term + 1))
            until = multiunion(self._until.values(None, term - 1))
            since = multiunion(self._since.values(term + 1))

            result = multiunion([since, since_only, until_only, until])
            if cache is not None:
                cache[cachekey] = result

            return difference(resultset, result)

    def _insert_migrate(self, tree, key, value):
        treeset = tree.get(key, None)
        if treeset is None:
            tree[key] = IITreeSet((value, ))
        else:
            if isinstance(treeset, IITreeSet):
                treeset.insert(value)
            elif isinstance(treeset, int):
                tree[key] = IITreeSet((treeset, value))
            else:
                tree[key] = IITreeSet(treeset)
                tree[key].insert(value)

    def _insertForwardIndexEntry(self, since, until, documentId):
        """Insert 'documentId' into the appropriate set based on 'datum'.
        """
        if since is None and until is None:
            self._always.insert(documentId)
        elif since is None:
            self._insert_migrate(self._until_only, until, documentId)
        elif until is None:
            self._insert_migrate(self._since_only, since, documentId)
        else:
            self._insert_migrate(self._since, since, documentId)
            self._insert_migrate(self._until, until, documentId)

    def _remove_delete(self, tree, key, value):
        treeset = tree.get(key, None)
        if treeset is not None:
            if isinstance(treeset, int):
                del tree[key]
            else:
                treeset.remove(value)
                if not treeset:
                    del tree[key]

    def _removeForwardIndexEntry(self, since, until, documentId):
        """Remove 'documentId' from the appropriate set based on 'datum'.
        """
        if since is None and until is None:
            self._always.remove(documentId)
        elif since is None:
            self._remove_delete(self._until_only, until, documentId)
        elif until is None:
            self._remove_delete(self._since_only, since, documentId)
        else:
            self._remove_delete(self._since, since, documentId)
            self._remove_delete(self._until, until, documentId)

    def _convertDateTime(self, value):
        value = datetime_to_minutes(value, self.precision_value)

        if value is None:
            return None

        if (value > self.ceiling_value or value < self.floor_value):
            # handle values outside our specified range
            return None

        return value
Пример #18
0
class DateRangeIndex(UnIndex):

    """Index for date ranges, such as the "effective-expiration" range in CMF.

    Any object may return None for either the start or the end date: for the
    start date, this should be the logical equivalent of "since the beginning
    of time"; for the end date, "until the end of time".

    Therefore, divide the space of indexed objects into four containers:

    - Objects which always match (i.e., they returned None for both);

    - Objects which match after a given time (i.e., they returned None for the
      end date);

    - Objects which match until a given time (i.e., they returned None for the
      start date);

    - Objects which match only during a specific interval.
    """

    implements(IDateRangeIndex)

    security = ClassSecurityInfo()

    meta_type = "DateRangeIndex"
    query_options = ("query",)

    manage_options = ({"label": "Properties", "action": "manage_indexProperties"},)

    since_field = until_field = None

    # int(DateTime('1000/1/1 0:00 GMT-12').millis() / 1000 / 60)
    floor_value = -510162480
    # int(DateTime('2499/12/31 0:00 GMT+12').millis() / 1000 / 60)
    ceiling_value = 278751600

    def __init__(
        self, id, since_field=None, until_field=None, caller=None, extra=None, floor_value=None, ceiling_value=None
    ):

        if extra:
            since_field = extra.since_field
            until_field = extra.until_field
            floor_value = getattr(extra, "floor_value", None)
            ceiling_value = getattr(extra, "ceiling_value", None)

        self._setId(id)
        self._edit(since_field, until_field, floor_value, ceiling_value)
        self.clear()

    security.declareProtected(view, "getSinceField")

    def getSinceField(self):
        """Get the name of the attribute indexed as start date.
        """
        return self._since_field

    security.declareProtected(view, "getUntilField")

    def getUntilField(self):
        """Get the name of the attribute indexed as end date.
        """
        return self._until_field

    security.declareProtected(view, "getFloorValue")

    def getFloorValue(self):
        """"""
        return self.floor_value

    security.declareProtected(view, "getCeilingValue")

    def getCeilingValue(self):
        """"""
        return self.ceiling_value

    manage_indexProperties = DTMLFile("manageDateRangeIndex", _dtmldir)

    security.declareProtected(manage_zcatalog_indexes, "manage_edit")

    def manage_edit(self, since_field, until_field, floor_value, ceiling_value, REQUEST):
        """"""
        self._edit(since_field, until_field, floor_value, ceiling_value)
        REQUEST["RESPONSE"].redirect("%s/manage_main" "?manage_tabs_message=Updated" % REQUEST.get("URL2"))

    security.declarePrivate("_edit")

    def _edit(self, since_field, until_field, floor_value=None, ceiling_value=None):
        """Update the fields used to compute the range.
        """
        self._since_field = since_field
        self._until_field = until_field
        if floor_value is not None:
            self.floor_value = int(floor_value)
        if ceiling_value is not None:
            self.ceiling_value = int(ceiling_value)

    security.declareProtected(manage_zcatalog_indexes, "clear")

    def clear(self):
        """Start over fresh."""
        self._always = IITreeSet()
        self._since_only = IOBTree()
        self._until_only = IOBTree()
        self._since = IOBTree()
        self._until = IOBTree()
        self._unindex = IOBTree()  # 'datum' will be a tuple of date ints
        self._length = Length()

    def getEntryForObject(self, documentId, default=None):
        """Get all information contained for the specific object
        identified by 'documentId'.  Return 'default' if not found.
        """
        return self._unindex.get(documentId, default)

    def index_object(self, documentId, obj, threshold=None):
        """Index an object:
        - 'documentId' is the integer ID of the document
        - 'obj' is the object to be indexed
        - ignore threshold
        """
        if self._since_field is None:
            return 0

        since = getattr(obj, self._since_field, None)
        if safe_callable(since):
            since = since()
        since = self._convertDateTime(since)

        until = getattr(obj, self._until_field, None)
        if safe_callable(until):
            until = until()
        until = self._convertDateTime(until)

        datum = (since, until)

        old_datum = self._unindex.get(documentId, None)
        if datum == old_datum:  # No change?  bail out!
            return 0

        if old_datum is not None:
            old_since, old_until = old_datum
            self._removeForwardIndexEntry(old_since, old_until, documentId)

        self._insertForwardIndexEntry(since, until, documentId)
        self._unindex[documentId] = datum

        return 1

    def unindex_object(self, documentId):
        """Remove the object corresponding to 'documentId' from the index.
        """
        datum = self._unindex.get(documentId, None)
        if datum is None:
            return

        since, until = datum
        self._removeForwardIndexEntry(since, until, documentId)
        del self._unindex[documentId]

    def uniqueValues(self, name=None, withLengths=0):
        """Return a list of unique values for 'name'.

        If 'withLengths' is true, return a sequence of tuples, in
        the form '(value, length)'.
        """
        if not name in (self._since_field, self._until_field):
            return []

        if name == self._since_field:
            t1 = self._since
            t2 = self._since_only
        else:
            t1 = self._until
            t2 = self._until_only

        result = []
        if not withLengths:
            result.extend(t1.keys())
            result.extend(t2.keys())
        else:
            for key in t1.keys():
                set = t1[key]
                if isinstance(set, int):
                    length = 1
                else:
                    length = len(set)
                result.append((key, length))
            for key in t2.keys():
                set = t2[key]
                if isinstance(set, int):
                    length = 1
                else:
                    length = len(set)
                result.append((key, length))
        return tuple(result)

    def _cache_key(self, catalog):
        cid = catalog.getId()
        counter = getattr(aq_base(catalog), "getCounter", None)
        if counter is not None:
            return "%s_%s" % (cid, counter())
        return cid

    def _apply_index(self, request, resultset=None):
        """Apply the index to query parameters given in 'request', which
        should be a mapping object.

        If the request does not contain the needed parameters, then
        return None.

        Otherwise return two objects.  The first object is a ResultSet
        containing the record numbers of the matching records.  The
        second object is a tuple containing the names of all data fields
        used.
        """
        iid = self.id
        record = parseIndexRequest(request, iid, self.query_options)
        if record.keys is None:
            return None

        term = self._convertDateTime(record.keys[0])
        REQUEST = aq_get(self, "REQUEST", None)
        if REQUEST is not None:
            catalog = aq_parent(aq_parent(aq_inner(self)))
            if catalog is not None:
                key = self._cache_key(catalog)
                cache = REQUEST.get(key, None)
                tid = isinstance(term, int) and term / 10 or "None"
                if resultset is None:
                    cachekey = "_daterangeindex_%s_%s" % (iid, tid)
                else:
                    cachekey = "_daterangeindex_inverse_%s_%s" % (iid, tid)
                if cache is None:
                    cache = REQUEST[key] = RequestCache()
                else:
                    cached = cache.get(cachekey, None)
                    if cached is not None:
                        if resultset is None:
                            return (cached, (self._since_field, self._until_field))
                        else:
                            return (difference(resultset, cached), (self._since_field, self._until_field))

        if resultset is None:
            # Aggregate sets for each bucket separately, to avoid
            # large-small union penalties.
            until_only = multiunion(self._until_only.values(term))
            since_only = multiunion(self._since_only.values(None, term))
            until = multiunion(self._until.values(term))

            # Total result is bound by resultset
            if REQUEST is None:
                until = intersection(resultset, until)

            since = multiunion(self._since.values(None, term))
            bounded = intersection(until, since)

            # Merge from smallest to largest.
            result = multiunion([bounded, until_only, since_only, self._always])
            if REQUEST is not None and catalog is not None:
                cache[cachekey] = result

            return (result, (self._since_field, self._until_field))
        else:
            # Compute the inverse and subtract from res
            until_only = multiunion(self._until_only.values(None, term - 1))
            since_only = multiunion(self._since_only.values(term + 1))
            until = multiunion(self._until.values(None, term - 1))
            since = multiunion(self._since.values(term + 1))

            result = multiunion([since, since_only, until_only, until])
            if REQUEST is not None and catalog is not None:
                cache[cachekey] = result

            return (difference(resultset, result), (self._since_field, self._until_field))

    def _insert_migrate(self, tree, key, value):
        treeset = tree.get(key, None)
        if treeset is None:
            tree[key] = IITreeSet((value,))
        else:
            if isinstance(treeset, IITreeSet):
                treeset.insert(value)
            elif isinstance(treeset, int):
                tree[key] = IITreeSet((treeset, value))
            else:
                tree[key] = IITreeSet(treeset)
                tree[key].insert(value)

    def _insertForwardIndexEntry(self, since, until, documentId):
        """Insert 'documentId' into the appropriate set based on 'datum'.
        """
        if since is None and until is None:
            self._always.insert(documentId)
        elif since is None:
            self._insert_migrate(self._until_only, until, documentId)
        elif until is None:
            self._insert_migrate(self._since_only, since, documentId)
        else:
            self._insert_migrate(self._since, since, documentId)
            self._insert_migrate(self._until, until, documentId)

    def _remove_delete(self, tree, key, value):
        treeset = tree.get(key, None)
        if treeset is not None:
            if isinstance(treeset, int):
                del tree[key]
            else:
                treeset.remove(value)
                if not treeset:
                    del tree[key]

    def _removeForwardIndexEntry(self, since, until, documentId):
        """Remove 'documentId' from the appropriate set based on 'datum'.
        """
        if since is None and until is None:
            self._always.remove(documentId)
        elif since is None:
            self._remove_delete(self._until_only, until, documentId)
        elif until is None:
            self._remove_delete(self._since_only, since, documentId)
        else:
            self._remove_delete(self._since, since, documentId)
            self._remove_delete(self._until, until, documentId)

    def _convertDateTime(self, value):
        if value is None:
            return value
        if isinstance(value, (str, datetime)):
            dt_obj = DateTime(value)
            value = dt_obj.millis() / 1000 / 60  # flatten to minutes
        elif isinstance(value, DateTime):
            value = value.millis() / 1000 / 60  # flatten to minutes
        if value > MAX32 or value < -MAX32:
            # t_val must be integer fitting in the 32bit range
            raise OverflowError("%s is not within the range of dates allowed" "by a DateRangeIndex" % value)
        value = int(value)
        # handle values outside our specified range
        if value > self.ceiling_value:
            return None
        elif value < self.floor_value:
            return None
        return value
Пример #19
0
    def index_object(self, documentId, obj, threshold=None):
        """Index an object.

        - ``documentId`` is the integer ID of the document.

        - ``obj`` is the object to be indexed.

        - ``threshold`` is the number of words to process between committing
          subtransactions.  If None, subtransactions are disabled.

        For each name in ``getIndexSourceNames``, try to get the named
        attribute from ``obj``.

        - If the object does not have the attribute, do not add it to the
          index for that name.

        - If the attribute is a callable, call it to get the value.  If
          calling it raises an AttributeError, do not add it to the index.
          for that name.
        """
        # Clear the data structures before indexing the object. This will ensure
        # we don't leave any stale data behind when an object gets reindexed.
        self.unindex_object(documentId)

        ### 1. Get the values.
        start = self._getattr(self.start_attr, obj)
        end = self._getattr(self.end_attr, obj)
        if start is None:
            # Ignore calls if the obj does not have the start field.
            return False

        if end is None:
            # Singular event
            end = start

        recurrence = self._getattr(self.recurrence_attr, obj)
        if not recurrence:
            rule = None
        elif isinstance(recurrence, basestring):
            # XXX trap and log errors
            rule = rrule.rrulestr(recurrence, dtstart=start)
        elif isinstance(recurrence, rrule.rrulebase):
            rule = recurrence
        else:
            #XXX Log error
            rule = None

        # Strip out times from the recurrence:
        if rule is not None:
            sync_timezone(rule, start.tzinfo)

        ### 2. Make them into what should be indexed.
        # XXX Naive events are not comparable to timezoned events, so we convert
        # everything to utctimetuple(). This means naive events are assumed to
        # be GMT, but we can live with that at the moment.
        start_value = start.utctimetuple()
        end_value = end.utctimetuple()

        # The end value should be the end of the recurrence, if any:
        if rule is not None:
            if is_open_ended(rule):
                # This recurrence is open ended
                end_value = None
            else:
                duration = end - start
                allrecs = [x for x in rule._iter()]
                if allrecs:
                    last = allrecs[-1] + duration
                else:
                    # Real data may have invalud recurrence rules,
                    # which end before the start for example.
                    # Then we end up here.
                    last = end
                end_value = last.utctimetuple()

        ### 3. Store everything in the indexes:
        row = self._start2uid.get(start_value, None)
        if row is None:
            row = IITreeSet((documentId,))
            self._start2uid[start_value] = row
        else:
            row.insert(documentId)

        row = self._end2uid.get(end_value, None)
        if row is None:
            row = IITreeSet((documentId,))
            self._end2uid[end_value] = row
        else:
            row.insert(documentId)

        self._uid2start[documentId] = start_value
        self._uid2recurrence[documentId] = rule
        self._uid2end[documentId] = end_value
        self._uid2duration[documentId] = end - start

        return True