Пример #1
0
class database:
    def __init__(self):
        self.data = []
        self.dict = IIBTree()
        self.index = -1  #index of the last element in the list

    def insert(self, key, value):
        if self.dict.has_key(key):
            index = self.dict.get(key)
            self.data[index] = value
        else:
            self.data.append(value)
            self.index = self.index + 1
            index = self.index
            self.dict.update({key: index})

    def search(self, key):
        if self.dict.has_key(key):
            value = self.data[self.dict.get(key)]
            return value
        return "NOT PRESENT"

    def delete(self, key):
        self.dict.pop(key)
Пример #2
0
class BooleanIndex(UnIndex):
    """Index for booleans

       self._index = set([documentId1, documentId2])
       self._unindex = {documentId:[True/False]}

       self._length is the length of the unindex
       self._index_length is the length of the index

       False doesn't have actual entries in _index.
    """

    meta_type = "BooleanIndex"

    manage_options = (
        {'label': 'Settings',
         'action': 'manage_main'},
        {'label': 'Browse',
         'action': 'manage_browse'},
    )

    query_options = ["query"]

    manage = manage_main = DTMLFile('dtml/manageBooleanIndex', globals())
    manage_main._setName('manage_main')
    manage_browse = DTMLFile('../dtml/browseIndex', globals())

    _index_value = 1
    _index_length = None

    def clear(self):
        self._index = IITreeSet()
        self._index_length = BTrees.Length.Length()
        self._index_value = 1
        self._unindex = IIBTree()
        self._length = BTrees.Length.Length()
        if self._counter is None:
            self._counter = BTrees.Length.Length()
        else:
            self._increment_counter()

    def histogram(self):
        """Return a mapping which provides a histogram of the number of
        elements found at each point in the index.
        """
        histogram = {}
        indexed = bool(self._index_value)
        histogram[indexed] = self._index_length.value
        histogram[not indexed] = self._length.value - self._index_length.value
        return histogram

    def _invert_index(self, documentId=None):
        self._index_value = indexed = int(not self._index_value)
        self._index.clear()
        length = 0
        for rid, value in self._unindex.iteritems():
            if value == indexed:
                self._index.add(rid)
                length += 1
        # documentId is the rid of the currently processed object that
        # triggered the invert. in the case of unindexing, the rid hasn't
        # been removed from the unindex yet. While indexing, the rid will
        # be added to the index and unindex after this method is done
        if documentId is not None:
            self._index.remove(documentId)
            length -= 1
        self._index_length = BTrees.Length.Length(length)

    def insertForwardIndexEntry(self, entry, documentId):
        """If the value matches the indexed one, insert into treeset
        """
        # When we get the first entry, decide to index the opposite of what
        # we got, as indexing zero items is fewer than one.
        if self._length.value == 0:
            self._index_value = int(not bool(entry))

        # if the added entry value is index value, insert it into index
        if bool(entry) is bool(self._index_value):
            self._index_length.change(1)
            self._index.insert(documentId)

        # insert value into global unindex (before computing index invert)
        self._unindex[documentId] = entry
        self._length.change(1)

        # is the index (after adding the current entry) larger than 60%
        # of the total length? than switch the indexed value
        if bool(entry) is bool(self._index_value):
            if (self._index_length.value) >= ((self._length.value) * 0.6):
                self._invert_index()

    def removeForwardIndexEntry(self, entry, documentId, check=True):
        """Take the entry provided and remove any reference to documentId
        in its entry in the index.
        """
        if bool(entry) is bool(self._index_value):
            try:
                self._index.remove(documentId)
                self._index_length.change(-1)
            except ConflictError:
                raise
            except Exception:
                LOG.exception(
                    '%s: unindex_object could not remove documentId %s '
                    'from index %s. This should not happen.' % (
                        self.__class__.__name__,
                        str(documentId),
                        str(self.id)))
        elif check:
            # is the index (after removing the current entry) larger than
            # 60% of the total length? than switch the indexed value
            if (self._index_length.value) <= ((self._length.value - 1) * 0.6):
                self._invert_index(documentId)
                return

    def _index_object(self, documentId, obj, threshold=None, attr=''):
        """ index and object 'obj' with integer id 'documentId'"""
        returnStatus = 0

        # First we need to see if there's anything interesting to look at
        datum = self._get_object_datum(obj, attr)

        # Make it boolean, int as an optimization
        if datum is not _marker:
            datum = int(bool(datum))

        # We don't want to do anything that we don't have to here, so we'll
        # check to see if the new and existing information is the same.
        oldDatum = self._unindex.get(documentId, _marker)
        if datum != oldDatum:
            if oldDatum is not _marker:
                self.removeForwardIndexEntry(oldDatum, documentId, check=False)
                if datum is _marker:
                    try:
                        del self._unindex[documentId]
                        self._length.change(-1)
                    except ConflictError:
                        raise
                    except Exception:
                        LOG.error('Should not happen: oldDatum was there, now '
                                  'its not, for document with id %s' %
                                  documentId)

            if datum is not _marker:
                self.insertForwardIndexEntry(datum, documentId)

            returnStatus = 1

        return returnStatus

    def unindex_object(self, documentId):
        """ Unindex the object with integer id 'documentId' and don't
        raise an exception if we fail
        """
        unindexRecord = self._unindex.get(documentId, _marker)
        if unindexRecord is _marker:
            return None

        self._increment_counter()

        self.removeForwardIndexEntry(unindexRecord, documentId)

        try:
            del self._unindex[documentId]
            self._length.change(-1)
        except ConflictError:
            raise
        except Exception:
            LOG.debug('Attempt to unindex nonexistent document'
                      ' with id %s' % documentId, exc_info=True)

    def query_index(self, record, resultset=None):
        index = self._index
        indexed = self._index_value

        for key in record.keys:
            if bool(key) is bool(indexed):
                # If we match the indexed value, check index
                return intersection(index, resultset)
            else:
                # Otherwise, remove from resultset or _unindex
                if resultset is None:
                    return union(difference(self._unindex, index), IISet([]))
                else:
                    return difference(resultset, index)
        return IISet()

    def indexSize(self):
        """Return distinct values, as an optimization we always claim 2."""
        return 2

    def items(self):
        # return a list of value to int set of rid tuples
        indexed = self._index_value
        items = [(bool(indexed), self._index)]
        false = IISet()
        for rid, value in self._unindex.iteritems():
            if value != indexed:
                false.add(rid)
        items.append((not bool(indexed), false))
        return items
Пример #3
0
class DateIndex(UnIndex, PropertyManager):

    """Index for dates.
    """
    implements(IDateIndex)

    meta_type = 'DateIndex'
    query_options = ('query', 'range')

    index_naive_time_as_local = True # False means index as UTC
    _properties=({'id':'index_naive_time_as_local',
                  'type':'boolean',
                  'mode':'w'},)

    manage = manage_main = DTMLFile( 'dtml/manageDateIndex', globals() )
    manage_browse = DTMLFile('../dtml/browseIndex', globals())

    manage_main._setName( 'manage_main' )
    manage_options = ( { 'label' : 'Settings'
                       , 'action' : 'manage_main'
                       },
                       {'label': 'Browse',
                        'action': 'manage_browse',
                       },
                     ) + PropertyManager.manage_options

    def clear( self ):
        """ Complete reset """
        self._index = IOBTree()
        self._unindex = IIBTree()
        self._length = Length()

    def index_object( self, documentId, obj, threshold=None ):
        """index an object, normalizing the indexed value to an integer

           o Normalized value has granularity of one minute.

           o Objects which have 'None' as indexed value are *omitted*,
             by design.
        """
        returnStatus = 0

        try:
            date_attr = getattr( obj, self.id )
            if safe_callable( date_attr ):
                date_attr = date_attr()

            ConvertedDate = self._convert( value=date_attr, default=_marker )
        except AttributeError:
            ConvertedDate = _marker

        oldConvertedDate = self._unindex.get( documentId, _marker )

        if ConvertedDate != oldConvertedDate:
            if oldConvertedDate is not _marker:
                self.removeForwardIndexEntry(oldConvertedDate, documentId)
                if ConvertedDate is _marker:
                    try:
                        del self._unindex[documentId]
                    except ConflictError:
                        raise
                    except:
                        LOG.error("Should not happen: ConvertedDate was there,"
                                  " now it's not, for document with id %s" %
                                  documentId)

            if ConvertedDate is not _marker:
                self.insertForwardIndexEntry( ConvertedDate, documentId )
                self._unindex[documentId] = ConvertedDate

            returnStatus = 1

        return returnStatus

    def _apply_index(self, request, resultset=None):
        """Apply the index to query parameters given in the argument

        Normalize the 'query' arguments into integer values at minute
        precision before querying.
        """
        record = parseIndexRequest(request, self.id, self.query_options)
        if record.keys is None:
            return None

        keys = map( self._convert, record.keys )

        index = self._index
        r = None
        opr = None

        #experimental code for specifing the operator
        operator = record.get( 'operator', self.useOperator )
        if not operator in self.operators :
            raise RuntimeError("operator not valid: %s" % operator)

        # depending on the operator we use intersection or union
        if operator=="or":
            set_func = union
        else:
            set_func = intersection

        # range parameter
        range_arg = record.get('range',None)
        if range_arg:
            opr = "range"
            opr_args = []
            if range_arg.find("min") > -1:
                opr_args.append("min")
            if range_arg.find("max") > -1:
                opr_args.append("max")

        if record.get('usage',None):
            # see if any usage params are sent to field
            opr = record.usage.lower().split(':')
            opr, opr_args = opr[0], opr[1:]

        if opr=="range":   # range search
            if 'min' in opr_args:
                lo = min(keys)
            else:
                lo = None

            if 'max' in opr_args:
                hi = max(keys)
            else:
                hi = None

            if hi:
                setlist = index.values(lo,hi)
            else:
                setlist = index.values(lo)

            r = multiunion(setlist)

        else: # not a range search
            for key in keys:
                set = index.get(key, None)
                if set is not None:
                    if isinstance(set, int):
                        set = IISet((set,))
                    else:
                        # set can't be bigger than resultset
                        set = intersection(set, resultset)
                    r = set_func(r, set)

        if isinstance(r, int):
            r = IISet((r,))

        if r is None:
            return IISet(), (self.id,)
        else:
            return r, (self.id,)

    def _convert( self, value, default=None ):
        """Convert Date/Time value to our internal representation"""
        # XXX: Code patched 20/May/2003 by Kiran Jonnalagadda to
        # convert dates to UTC first.
        if isinstance(value, DateTime):
            t_tup = value.toZone('UTC').parts()
        elif isinstance(value, (float, int)):
            t_tup = time.gmtime( value )
        elif isinstance(value, str) and value:
            t_obj = DateTime( value ).toZone('UTC')
            t_tup = t_obj.parts()
        elif isinstance(value, datetime):
            if self.index_naive_time_as_local and value.tzinfo is None:
                value = value.replace(tzinfo=Local)
            # else if tzinfo is None, naive time interpreted as UTC
            t_tup = value.utctimetuple()
        elif isinstance(value, date):
            t_tup = value.timetuple()
        else:
            return default

        yr = t_tup[0]
        mo = t_tup[1]
        dy = t_tup[2]
        hr = t_tup[3]
        mn = t_tup[4]

        t_val = ( ( ( ( yr * 12 + mo ) * 31 + dy ) * 24 + hr ) * 60 + mn )

        if t_val > MAX32:
            # t_val must be integer fitting in the 32bit range
            raise OverflowError(
                "%s is not within the range of indexable dates (index: %s)"
                % (value, self.id))

        return t_val
Пример #4
0
class BooleanIndex(UnIndex):
    """Index for booleans

       self._index = set([documentId1, documentId2])
       self._unindex = {documentId:[True/False]}

       self._length is the length of the unindex
       self._index_length is the length of the index

       False doesn't have actual entries in _index.
    """

    meta_type = "BooleanIndex"

    manage_options = (
        {
            'label': 'Settings',
            'action': 'manage_main'
        },
        {
            'label': 'Browse',
            'action': 'manage_browse'
        },
    )

    query_options = ["query"]

    manage = manage_main = DTMLFile('dtml/manageBooleanIndex', globals())
    manage_main._setName('manage_main')
    manage_browse = DTMLFile('../dtml/browseIndex', globals())

    _index_value = 1
    _index_length = None

    def clear(self):
        self._index = IITreeSet()
        self._index_length = BTrees.Length.Length()
        self._index_value = 1
        self._unindex = IIBTree()
        self._length = BTrees.Length.Length()

    def histogram(self):
        """Return a mapping which provides a histogram of the number of
        elements found at each point in the index.
        """
        histogram = {}
        indexed = bool(self._index_value)
        histogram[indexed] = self._index_length.value
        histogram[not indexed] = self._length.value - self._index_length.value
        return histogram

    def _invert_index(self, documentId=None):
        self._index_value = indexed = int(not self._index_value)
        self._index.clear()
        length = 0
        for rid, value in self._unindex.iteritems():
            if value == indexed:
                self._index.add(rid)
                length += 1
        # documentId is the rid of the currently processed object that
        # triggered the invert. in the case of unindexing, the rid hasn't
        # been removed from the unindex yet. While indexing, the rid will
        # be added to the index and unindex after this method is done
        if documentId is not None:
            self._index.remove(documentId)
            length -= 1
        self._index_length = BTrees.Length.Length(length)

    def _inline_migration(self):
        self._length = BTrees.Length.Length(len(self._unindex.keys()))
        self._index_length = BTrees.Length.Length(len(self._index))
        if self._index_length.value > (self._length.value / 2):
            self._index_value = 1
            self._invert_index()
        else:
            # set an instance variable
            self._index_value = 1

    def insertForwardIndexEntry(self, entry, documentId):
        """If the value matches the indexed one, insert into treeset
        """
        # when we get the first entry, decide to index the opposite of what
        # we got, as indexing zero items is fewer than one
        # BBB inline migration
        if self._index_length is None:
            self._inline_migration()

        if self._length.value == 0:
            self._index_value = int(not bool(entry))

        # if the added entry value is index value, insert it into index
        if bool(entry) is bool(self._index_value):
            self._index_length.change(1)
            self._index.insert(documentId)

        # insert value into global unindex (before computing index invert)
        self._unindex[documentId] = entry
        self._length.change(1)

        # is the index (after adding the current entry) larger than 60%
        # of the total length? than switch the indexed value
        if bool(entry) is bool(self._index_value):
            if (self._index_length.value) >= ((self._length.value) * 0.6):
                self._invert_index()

    def removeForwardIndexEntry(self, entry, documentId, check=True):
        """Take the entry provided and remove any reference to documentId
        in its entry in the index.
        """
        index_length = self._index_length
        if index_length is None:
            self._inline_migration()

        if bool(entry) is bool(self._index_value):
            try:
                self._index.remove(documentId)
                # BBB inline migration
                length = self._index_length
                length.change(-1)
            except ConflictError:
                raise
            except Exception:
                LOG.exception(
                    '%s: unindex_object could not remove '
                    'documentId %s from index %s. This '
                    'should not happen.' %
                    (self.__class__.__name__, str(documentId), str(self.id)))
        elif check:
            length = self._length.value
            index_length = self._index_length.value
            # is the index (after removing the current entry) larger than
            # 60% of the total length? than switch the indexed value
            if (index_length) <= ((length - 1) * 0.6):
                self._invert_index(documentId)
                return

    def _index_object(self, documentId, obj, threshold=None, attr=''):
        """ index and object 'obj' with integer id 'documentId'"""
        returnStatus = 0

        # First we need to see if there's anything interesting to look at
        datum = self._get_object_datum(obj, attr)

        # Make it boolean, int as an optimization
        if datum is not _marker:
            datum = int(bool(datum))

        # We don't want to do anything that we don't have to here, so we'll
        # check to see if the new and existing information is the same.
        oldDatum = self._unindex.get(documentId, _marker)
        if datum != oldDatum:
            if oldDatum is not _marker:
                self.removeForwardIndexEntry(oldDatum, documentId, check=False)
                if datum is _marker:
                    try:
                        del self._unindex[documentId]
                        self._length.change(-1)
                    except ConflictError:
                        raise
                    except Exception:
                        LOG.error('Should not happen: oldDatum was there, now '
                                  'its not, for document with id %s' %
                                  documentId)

            if datum is not _marker:
                self.insertForwardIndexEntry(datum, documentId)

            returnStatus = 1

        return returnStatus

    def unindex_object(self, documentId):
        """ Unindex the object with integer id 'documentId' and don't
        raise an exception if we fail
        """
        unindexRecord = self._unindex.get(documentId, _marker)
        if unindexRecord is _marker:
            return None

        self.removeForwardIndexEntry(unindexRecord, documentId)

        try:
            del self._unindex[documentId]
            self._length.change(-1)
        except ConflictError:
            raise
        except Exception:
            LOG.debug('Attempt to unindex nonexistent document'
                      ' with id %s' % documentId,
                      exc_info=True)

    def _apply_index(self, request, resultset=None):
        record = parseIndexRequest(request, self.id, self.query_options)
        if record.keys is None:
            return None

        index = self._index
        indexed = self._index_value

        for key in record.keys:
            if bool(key) is bool(indexed):
                # If we match the indexed value, check index
                return (intersection(index, resultset), (self.id, ))
            else:
                # Otherwise, remove from resultset or _unindex
                if resultset is None:
                    return (union(difference(self._unindex, index),
                                  IISet([])), (self.id, ))
                else:
                    return (difference(resultset, index), (self.id, ))
        return (IISet(), (self.id, ))

    def indexSize(self):
        """Return distinct values, as an optimization we always claim 2."""
        return 2

    def items(self):
        # return a list of value to int set of rid tuples
        indexed = self._index_value
        items = [(bool(indexed), self._index)]
        false = IISet()
        for rid, value in self._unindex.iteritems():
            if value != indexed:
                false.add(rid)
        items.append((not bool(indexed), false))
        return items
Пример #5
0
class DateIndex(UnIndex, PropertyManager):
    """Index for dates.
    """

    meta_type = 'DateIndex'
    query_options = ('query', 'range', 'not')

    index_naive_time_as_local = True  # False means index as UTC
    _properties = ({'id': 'index_naive_time_as_local',
                    'type': 'boolean',
                    'mode': 'w'},)

    manage = manage_main = DTMLFile('dtml/manageDateIndex', globals())
    manage_browse = DTMLFile('../dtml/browseIndex', globals())

    manage_main._setName('manage_main')
    manage_options = ({'label': 'Settings', 'action': 'manage_main'},
                      {'label': 'Browse', 'action': 'manage_browse'},
                      ) + PropertyManager.manage_options

    def clear(self):
        """ Complete reset """
        self._index = IOBTree()
        self._unindex = IIBTree()
        self._length = Length()
        if self._counter is None:
            self._counter = Length()
        else:
            self._increment_counter()

    def index_object(self, documentId, obj, threshold=None):
        """index an object, normalizing the indexed value to an integer

           o Normalized value has granularity of one minute.

           o Objects which have 'None' as indexed value are *omitted*,
             by design.
        """
        returnStatus = 0

        try:
            date_attr = getattr(obj, self.id)
            if safe_callable(date_attr):
                date_attr = date_attr()

            ConvertedDate = self._convert(value=date_attr, default=_marker)
        except AttributeError:
            ConvertedDate = _marker

        oldConvertedDate = self._unindex.get(documentId, _marker)

        if ConvertedDate != oldConvertedDate:
            if oldConvertedDate is not _marker:
                self.removeForwardIndexEntry(oldConvertedDate, documentId)
                if ConvertedDate is _marker:
                    try:
                        del self._unindex[documentId]
                    except ConflictError:
                        raise
                    except Exception:
                        LOG.error("Should not happen: ConvertedDate was there,"
                                  " now it's not, for document with id %s" %
                                  documentId)

            if ConvertedDate is not _marker:
                self.insertForwardIndexEntry(ConvertedDate, documentId)
                self._unindex[documentId] = ConvertedDate

            returnStatus = 1

        if returnStatus > 0:
            self._increment_counter()

        return returnStatus

    def _convert(self, value, default=None):
        """Convert Date/Time value to our internal representation"""
        if isinstance(value, DateTime):
            t_tup = value.toZone('UTC').parts()
        elif isinstance(value, (float, int)):
            t_tup = time.gmtime(value)
        elif isinstance(value, str) and value:
            t_obj = DateTime(value).toZone('UTC')
            t_tup = t_obj.parts()
        elif isinstance(value, datetime):
            if self.index_naive_time_as_local and value.tzinfo is None:
                value = value.replace(tzinfo=Local)
            # else if tzinfo is None, naive time interpreted as UTC
            t_tup = value.utctimetuple()
        elif isinstance(value, date):
            t_tup = value.timetuple()
        else:
            return default

        yr = t_tup[0]
        mo = t_tup[1]
        dy = t_tup[2]
        hr = t_tup[3]
        mn = t_tup[4]

        t_val = ((((yr * 12 + mo) * 31 + dy) * 24 + hr) * 60 + mn)

        if t_val > MAX32:
            # t_val must be integer fitting in the 32bit range
            raise OverflowError(
                "%s is not within the range of indexable dates (index: %s)"
                % (value, self.id))
        return t_val
Пример #6
0
class BooleanIndex(UnIndex):
    """Index for booleans

       self._index = set([documentId1, documentId2])
       self._unindex = {documentId:[True/False]}

       self._length is the length of the unindex
       self._index_length is the length of the index
       self._index_value is the indexed value

       The document ids in self._index have self._index_value as their value.

       Since there are only two possible values (True/False), the index
       only stores a forward index for the less common value.
       It starts off with the opposite of value of the first document
       and later checks and inverts itself, if more than 60% of all
       documents now have the indexed value. It does the inversion
       at 60% to avoid inverting itself constantly for an index that
       has a roughly equal 50/50 split.
    """

    meta_type = 'BooleanIndex'

    manage_options = (
        {
            'label': 'Settings',
            'action': 'manage_main'
        },
        {
            'label': 'Browse',
            'action': 'manage_browse'
        },
    )

    query_options = ['query']

    manage = manage_main = DTMLFile('dtml/manageBooleanIndex', globals())
    manage_main._setName('manage_main')
    manage_browse = DTMLFile('../dtml/browseIndex', globals())

    _index_value = 1
    _index_length = None

    def clear(self):
        self._index = IITreeSet()
        self._index_length = BTrees.Length.Length()
        self._index_value = 1
        self._unindex = IIBTree()
        self._length = BTrees.Length.Length()
        if self._counter is None:
            self._counter = BTrees.Length.Length()
        else:
            self._increment_counter()

    def histogram(self):
        """Return a mapping which provides a histogram of the number of
        elements found at each point in the index.
        """
        histogram = {}
        indexed = bool(self._index_value)
        histogram[indexed] = self._index_length.value
        histogram[not indexed] = self._length.value - self._index_length.value
        return histogram

    def _invert_index(self, documentId=None):
        self._index_value = indexed = int(not self._index_value)
        self._index.clear()
        length = 0
        for rid, value in self._unindex.iteritems():
            if value == indexed:
                self._index.add(rid)
                length += 1
        # documentId is the rid of the currently processed object that
        # triggered the invert. in the case of unindexing, the rid hasn't
        # been removed from the unindex yet. While indexing, the rid will
        # be added to the index and unindex after this method is done
        if documentId is not None:
            self._index.remove(documentId)
            length -= 1
        self._index_length = BTrees.Length.Length(length)

    def insertForwardIndexEntry(self, entry, documentId):
        """If the value matches the indexed one, insert into treeset
        """
        # When we get the first entry, decide to index the opposite of what
        # we got, as indexing zero items is fewer than one.
        if self._length.value == 0:
            self._index_value = int(not bool(entry))

        # if the added entry value is index value, insert it into index
        if bool(entry) is bool(self._index_value):
            self._index_length.change(1)
            self._index.insert(documentId)

        # insert value into global unindex (before computing index invert)
        self._unindex[documentId] = entry
        self._length.change(1)

        # is the index (after adding the current entry) larger than 60%
        # of the total length? than switch the indexed value
        if bool(entry) is bool(self._index_value):
            if (self._index_length.value) >= ((self._length.value) * 0.6):
                self._invert_index()

    def removeForwardIndexEntry(self, entry, documentId, check=True):
        """Take the entry provided and remove any reference to documentId
        in its entry in the index.
        """
        if bool(entry) is bool(self._index_value):
            try:
                self._index.remove(documentId)
                self._index_length.change(-1)
            except ConflictError:
                raise
            except Exception:
                LOG.exception(
                    '%(context)s: unindex_object could not '
                    'remove documentId %(doc_id)s from '
                    'index %(index)r. This should not '
                    'happen.',
                    dict(context=self.__class__.__name__,
                         doc_id=documentId,
                         index=self.id))
        elif check:
            # is the index (after removing the current entry) larger than
            # 60% of the total length? than switch the indexed value
            if (self._index_length.value) <= ((self._length.value - 1) * 0.6):
                self._invert_index(documentId)
                return

    def _index_object(self, documentId, obj, threshold=None, attr=''):
        """ index and object 'obj' with integer id 'documentId'"""
        returnStatus = 0

        # First we need to see if there's anything interesting to look at
        datum = self._get_object_datum(obj, attr)

        # Make it boolean, int as an optimization
        if datum is not _marker:
            datum = int(bool(datum))

        # We don't want to do anything that we don't have to here, so we'll
        # check to see if the new and existing information is the same.
        oldDatum = self._unindex.get(documentId, _marker)
        if datum != oldDatum:
            if oldDatum is not _marker:
                self.removeForwardIndexEntry(oldDatum, documentId, check=False)
                if datum is _marker:
                    try:
                        del self._unindex[documentId]
                        self._length.change(-1)
                    except ConflictError:
                        raise
                    except Exception:
                        LOG.error(
                            'Should not happen: oldDatum was there, now '
                            'its not, for document with id %s', documentId)

            if datum is not _marker:
                self.insertForwardIndexEntry(datum, documentId)

            returnStatus = 1

        return returnStatus

    def unindex_object(self, documentId):
        """ Unindex the object with integer id 'documentId' and don't
        raise an exception if we fail
        """
        unindexRecord = self._unindex.get(documentId, _marker)
        if unindexRecord is _marker:
            return None

        self._increment_counter()

        self.removeForwardIndexEntry(unindexRecord, documentId)

        try:
            del self._unindex[documentId]
            self._length.change(-1)
        except ConflictError:
            raise
        except Exception:
            LOG.debug('Attempt to unindex nonexistent document'
                      ' with id %s',
                      documentId,
                      exc_info=True)

    def query_index(self, record, resultset=None):
        index = self._index
        indexed = self._index_value

        for key in record.keys:
            if bool(key) is bool(indexed):
                # If we match the indexed value, check index
                return intersection(index, resultset)
            else:
                # Otherwise, remove from resultset or _unindex
                if resultset is None:
                    return union(difference(self._unindex, index), IISet([]))
                else:
                    return intersection(difference(resultset, index),
                                        self._unindex)
        return IISet()

    def indexSize(self):
        """Return distinct values, as an optimization we always claim 2."""
        return 2

    def items(self):
        # return a list of value to int set of rid tuples
        indexed = self._index_value
        items = [(bool(indexed), self._index)]
        false = IISet()
        for rid, value in self._unindex.iteritems():
            if value != indexed:
                false.add(rid)
        items.append((not bool(indexed), false))
        return items

    def uniqueValues(self, name=None, withLengths=0):
        """returns the unique values for name

        if withLengths is true, returns a sequence of
        tuples of (value, length)
        """
        if name is None:
            name = self.id
        elif name != self.id:
            return

        indexed = bool(self._index_value)
        unique_values = (indexed, not indexed)
        if not withLengths:
            for key in unique_values:
                yield key
        else:
            for key in unique_values:
                ilen = len(self._index)
                if key is indexed:
                    yield (key, ilen)
                else:
                    ulen = len(self._unindex)
                    yield (key, ulen - ilen)
Пример #7
0
class BooleanIndex(UnIndex):
    """Index for booleans

       self._index = set([documentId1, documentId2])
       self._unindex = {documentId:[True/False]}

       False doesn't have actual entries in _index.
    """

    meta_type = "BooleanIndex"

    manage_options = (
        {
            'label': 'Settings',
            'action': 'manage_main'
        },
        {
            'label': 'Browse',
            'action': 'manage_browse'
        },
    )

    query_options = ["query"]

    manage = manage_main = DTMLFile('dtml/manageBooleanIndex', globals())
    manage_main._setName('manage_main')
    manage_browse = DTMLFile('../dtml/browseIndex', globals())

    def clear(self):
        self._length = BTrees.Length.Length()
        self._index = IITreeSet()
        self._unindex = IIBTree()

    def insertForwardIndexEntry(self, entry, documentId):
        """If True, insert directly into treeset
        """
        if entry:
            self._index.insert(documentId)
            self._length.change(1)

    def removeForwardIndexEntry(self, entry, documentId):
        """Take the entry provided and remove any reference to documentId
        in its entry in the index.
        """
        try:
            if entry:
                self._index.remove(documentId)
                self._length.change(-1)
        except ConflictError:
            raise
        except Exception:
            LOG.exception(
                '%s: unindex_object could not remove '
                'documentId %s from index %s. This '
                'should not happen.' %
                (self.__class__.__name__, str(documentId), str(self.id)))

    def _index_object(self, documentId, obj, threshold=None, attr=''):
        """ index and object 'obj' with integer id 'documentId'"""
        returnStatus = 0

        # First we need to see if there's anything interesting to look at
        datum = self._get_object_datum(obj, attr)

        # Make it boolean, int as an optimization
        if datum is not _marker:
            datum = int(bool(datum))

        # We don't want to do anything that we don't have to here, so we'll
        # check to see if the new and existing information is the same.
        oldDatum = self._unindex.get(documentId, _marker)
        if datum != oldDatum:
            if oldDatum is not _marker:
                self.removeForwardIndexEntry(oldDatum, documentId)
                if datum is _marker:
                    try:
                        del self._unindex[documentId]
                    except ConflictError:
                        raise
                    except Exception:
                        LOG.error('Should not happen: oldDatum was there, now '
                                  'its not, for document with id %s' %
                                  documentId)

            if datum is not _marker:
                if datum:
                    self.insertForwardIndexEntry(datum, documentId)
                self._unindex[documentId] = datum

            returnStatus = 1

        return returnStatus

    def _apply_index(self, request, resultset=None):
        record = parseIndexRequest(request, self.id, self.query_options)
        if record.keys is None:
            return None

        index = self._index

        for key in record.keys:
            if key:
                # If True, check index
                return (intersection(index, resultset), (self.id, ))
            else:
                # Otherwise, remove from resultset or _unindex
                if resultset is None:
                    return (union(difference(self._unindex, index),
                                  IISet([])), (self.id, ))
                else:
                    return (difference(resultset, index), (self.id, ))
        return (IISet(), (self.id, ))

    def indexSize(self):
        """Return distinct values, as an optimization we always claim 2."""
        return 2

    def items(self):
        items = []
        for v, k in self._unindex.items():
            if isinstance(v, int):
                v = IISet((v, ))
            items.append((k, v))
        return items
Пример #8
0
class DateIndex(UnIndex, PropertyManager):
    """Index for dates.
    """
    implements(IDateIndex)

    meta_type = 'DateIndex'
    query_options = ('query', 'range')

    index_naive_time_as_local = True  # False means index as UTC
    _properties = ({'id': 'index_naive_time_as_local',
                    'type': 'boolean',
                    'mode': 'w'},)

    manage = manage_main = DTMLFile('dtml/manageDateIndex', globals())
    manage_browse = DTMLFile('../dtml/browseIndex', globals())

    manage_main._setName('manage_main')
    manage_options = ({'label': 'Settings', 'action': 'manage_main'},
                      {'label': 'Browse', 'action': 'manage_browse'},
                     ) + PropertyManager.manage_options

    def clear(self):
        """ Complete reset """
        self._index = IOBTree()
        self._unindex = IIBTree()
        self._length = Length()

    def index_object(self, documentId, obj, threshold=None):
        """index an object, normalizing the indexed value to an integer

           o Normalized value has granularity of one minute.

           o Objects which have 'None' as indexed value are *omitted*,
             by design.
        """
        returnStatus = 0

        try:
            date_attr = getattr(obj, self.id)
            if safe_callable(date_attr):
                date_attr = date_attr()

            ConvertedDate = self._convert(value=date_attr, default=_marker)
        except AttributeError:
            ConvertedDate = _marker

        oldConvertedDate = self._unindex.get(documentId, _marker)

        if ConvertedDate != oldConvertedDate:
            if oldConvertedDate is not _marker:
                self.removeForwardIndexEntry(oldConvertedDate, documentId)
                if ConvertedDate is _marker:
                    try:
                        del self._unindex[documentId]
                    except ConflictError:
                        raise
                    except Exception:
                        LOG.error("Should not happen: ConvertedDate was there,"
                                  " now it's not, for document with id %s" %
                                  documentId)

            if ConvertedDate is not _marker:
                self.insertForwardIndexEntry(ConvertedDate, documentId)
                self._unindex[documentId] = ConvertedDate

            returnStatus = 1

        return returnStatus

    def _apply_index(self, request, resultset=None):
        """Apply the index to query parameters given in the argument

        Normalize the 'query' arguments into integer values at minute
        precision before querying.
        """
        record = parseIndexRequest(request, self.id, self.query_options)
        if record.keys is None:
            return None

        keys = map(self._convert, record.keys)
        index = self._index
        r = None
        opr = None

        # experimental code for specifying the operator
        operator = record.get('operator', self.useOperator)
        if not operator in self.operators:
            raise RuntimeError("operator not valid: %s" % operator)

        # depending on the operator we use intersection or union
        if operator == "or":
            set_func = union
        else:
            set_func = intersection

        # range parameter
        range_arg = record.get('range', None)
        if range_arg:
            opr = "range"
            opr_args = []
            if range_arg.find("min") > -1:
                opr_args.append("min")
            if range_arg.find("max") > -1:
                opr_args.append("max")

        if record.get('usage', None):
            # see if any usage params are sent to field
            opr = record.usage.lower().split(':')
            opr, opr_args = opr[0], opr[1:]

        if opr == "range":  # range search
            if 'min' in opr_args:
                lo = min(keys)
            else:
                lo = None

            if 'max' in opr_args:
                hi = max(keys)
            else:
                hi = None

            if hi:
                setlist = index.values(lo, hi)
            else:
                setlist = index.values(lo)

            r = multiunion(setlist)

        else:  # not a range search
            for key in keys:
                set = index.get(key, None)
                if set is not None:
                    if isinstance(set, int):
                        set = IISet((set,))
                    else:
                        # set can't be bigger than resultset
                        set = intersection(set, resultset)
                    r = set_func(r, set)

        if isinstance(r, int):
            r = IISet((r,))

        if r is None:
            return IISet(), (self.id, )
        return r, (self.id, )

    def _convert(self, value, default=None):
        """Convert Date/Time value to our internal representation"""
        if isinstance(value, DateTime):
            t_tup = value.toZone('UTC').parts()
        elif isinstance(value, (float, int)):
            t_tup = time.gmtime(value)
        elif isinstance(value, str) and value:
            t_obj = DateTime(value).toZone('UTC')
            t_tup = t_obj.parts()
        elif isinstance(value, datetime):
            if self.index_naive_time_as_local and value.tzinfo is None:
                value = value.replace(tzinfo=Local)
            # else if tzinfo is None, naive time interpreted as UTC
            t_tup = value.utctimetuple()
        elif isinstance(value, date):
            t_tup = value.timetuple()
        else:
            return default

        yr = t_tup[0]
        mo = t_tup[1]
        dy = t_tup[2]
        hr = t_tup[3]
        mn = t_tup[4]

        t_val = ((((yr * 12 + mo) * 31 + dy) * 24 + hr) * 60 + mn)

        if t_val > MAX32:
            # t_val must be integer fitting in the 32bit range
            raise OverflowError(
                "%s is not within the range of indexable dates (index: %s)"
                % (value, self.id))
        return t_val
Пример #9
0
class DateIndex(UnIndex, PropertyManager):
    """Index for dates.
    """

    meta_type = 'DateIndex'
    query_options = ('query', 'range', 'not')

    index_naive_time_as_local = True  # False means index as UTC
    precision = 1  # precision of indexed time in minutes
    _properties = ({'id': 'index_naive_time_as_local',
                    'type': 'boolean',
                    'mode': 'w'},
                   {'id': 'precision',
                    'type': 'int',
                    'mode': 'w'},)

    manage = manage_main = DTMLFile('dtml/manageDateIndex', globals())
    manage_browse = DTMLFile('../dtml/browseIndex', globals())

    manage_main._setName('manage_main')
    manage_options = ({'label': 'Settings', 'action': 'manage_main'},
                      {'label': 'Browse', 'action': 'manage_browse'},
                      ) + PropertyManager.manage_options

    def clear(self):
        """ Complete reset """
        self._index = IOBTree()
        self._unindex = IIBTree()
        self._length = Length()
        if self._counter is None:
            self._counter = Length()
        else:
            self._increment_counter()

    def index_object(self, documentId, obj, threshold=None):
        """index an object, normalizing the indexed value to an integer

           o Normalized value has granularity of one minute.

           o Objects which have 'None' as indexed value are *omitted*,
             by design.
        """
        returnStatus = 0

        try:
            date_attr = getattr(obj, self.id)
            if safe_callable(date_attr):
                date_attr = date_attr()

            ConvertedDate = self._convert(value=date_attr, default=_marker)
        except AttributeError:
            ConvertedDate = _marker

        oldConvertedDate = self._unindex.get(documentId, _marker)

        if ConvertedDate != oldConvertedDate:
            if oldConvertedDate is not _marker:
                self.removeForwardIndexEntry(oldConvertedDate, documentId)
                if ConvertedDate is _marker:
                    try:
                        del self._unindex[documentId]
                    except ConflictError:
                        raise
                    except Exception:
                        LOG.error("Should not happen: ConvertedDate was there,"
                                  " now it's not, for document with id %s" %
                                  documentId)

            if ConvertedDate is not _marker:
                self.insertForwardIndexEntry(ConvertedDate, documentId)
                self._unindex[documentId] = ConvertedDate

            returnStatus = 1

        if returnStatus > 0:
            self._increment_counter()

        return returnStatus

    def _convert(self, value, default=None):
        """Convert Date/Time value to our internal representation"""
        if isinstance(value, DateTime):
            t_tup = value.toZone('UTC').parts()
        elif isinstance(value, (float, int)):
            t_tup = time.gmtime(value)
        elif isinstance(value, str) and value:
            t_obj = DateTime(value).toZone('UTC')
            t_tup = t_obj.parts()
        elif isinstance(value, datetime):
            if self.index_naive_time_as_local and value.tzinfo is None:
                value = value.replace(tzinfo=Local)
            # else if tzinfo is None, naive time interpreted as UTC
            t_tup = value.utctimetuple()
        elif isinstance(value, date):
            t_tup = value.timetuple()
        else:
            return default

        yr = t_tup[0]
        mo = t_tup[1]
        dy = t_tup[2]
        hr = t_tup[3]
        mn = t_tup[4]

        t_val = ((((yr * 12 + mo) * 31 + dy) * 24 + hr) * 60 + mn)

        # flatten to precision
        if self.precision > 1:
            t_val = t_val - (t_val % self.precision)

        t_val = int(t_val)

        if t_val > MAX32:
            # t_val must be integer fitting in the 32bit range
            raise OverflowError(
                "%s is not within the range of indexable dates (index: %s)"
                % (value, self.id))
        return t_val