def digest(self, key: str, minimum_delay: Optional[int] = None, timestamp: Optional[float] = None) -> Any: if minimum_delay is None: minimum_delay = self.minimum_delay if timestamp is None: timestamp = time.time() connection = self._get_connection(key) with self._get_timeline_lock(key, duration=30).acquire(): try: response = script( connection, [key], [ "DIGEST_OPEN", self.namespace, self.ttl, timestamp, key, self.capacity if self.capacity else -1, ], ) except ResponseError as e: if "err(invalid_state):" in str(e): raise InvalidState( "Timeline is not in the ready state.") from e else: raise records = map( lambda key__value__timestamp: Record( key__value__timestamp[0].decode("utf-8"), self.codec.decode(key__value__timestamp[1]) if key__value__timestamp[1] is not None else None, float(key__value__timestamp[2]), ), response, ) # If the record value is `None`, this means the record data was # missing (it was presumably evicted by Redis) so we don't need to # return it here. yield [record for record in records if record.value is not None] script( connection, [key], [ "DIGEST_CLOSE", self.namespace, self.ttl, timestamp, key, minimum_delay ] + [record.key for record in records], )
def digest(self, key, minimum_delay=None, timestamp=None): if minimum_delay is None: minimum_delay = self.minimum_delay if timestamp is None: timestamp = time.time() connection = self._get_connection(key) with self._get_timeline_lock(key, duration=30).acquire(): try: response = script(connection, [key], [ 'DIGEST_OPEN', self.namespace, self.ttl, timestamp, key, self.capacity if self.capacity else -1, ]) except ResponseError as e: if 'err(invalid_state):' in six.text_type(e): six.raise_from( InvalidState('Timeline is not in the ready state.'), e, ) else: raise records = map( lambda key__value__timestamp: Record( key__value__timestamp[0], self.codec.decode(key__value__timestamp[1]) if key__value__timestamp[1] is not None else None, float(key__value__timestamp[2]), ), response, ) # If the record value is `None`, this means the record data was # missing (it was presumably evicted by Redis) so we don't need to # return it here. yield filter( lambda record: record.value is not None, records, ) script( connection, [key], [ 'DIGEST_CLOSE', self.namespace, self.ttl, timestamp, key, minimum_delay ] + [record.key for record in records], )
def digest(self, key, minimum_delay=None): if minimum_delay is None: minimum_delay = self.minimum_delay timeline_key = make_timeline_key(self.namespace, key) digest_key = make_digest_key(timeline_key) connection = self.cluster.get_local_client_for_key(timeline_key) with Lock(timeline_key, nowait=True, timeout=30): # Check to ensure the timeline is in the correct state ("ready") # before sending. This acts as a throttling mechanism to prevent # sending a digest before it's next scheduled delivery time in a # race condition scenario. if connection.zscore(make_schedule_key(self.namespace, SCHEDULE_STATE_READY), key) is None: raise InvalidState('Timeline is not in the ready state.') with connection.pipeline() as pipeline: pipeline.watch(digest_key) # This shouldn't be necessary, but better safe than sorry? if pipeline.exists(digest_key): pipeline.multi() pipeline.zunionstore(digest_key, (timeline_key, digest_key), aggregate='max') pipeline.delete(timeline_key) pipeline.expire(digest_key, self.ttl) pipeline.execute() else: pipeline.multi() pipeline.rename(timeline_key, digest_key) pipeline.expire(digest_key, self.ttl) try: pipeline.execute() except ResponseError as error: if 'no such key' in str(error): logger.debug('Could not move timeline for digestion (likely has no contents.)') else: raise # XXX: This must select all records, even though not all of them will # be returned if they exceed the capacity, to ensure that all records # will be garbage collected. records = connection.zrevrange(digest_key, 0, -1, withscores=True) if not records: logger.info('Retrieved timeline containing no records.') def get_records_for_digest(): with connection.pipeline(transaction=False) as pipeline: for record_key, timestamp in records: pipeline.get(make_record_key(timeline_key, record_key)) for (record_key, timestamp), value in zip(records, pipeline.execute()): # We have to handle failures if the key does not exist -- # this could happen due to evictions or race conditions # where the record was added to a timeline while it was # already being digested. if value is None: logger.warning('Could not retrieve event for timeline.') else: yield Record(record_key, self.codec.decode(value), timestamp) yield itertools.islice(get_records_for_digest(), self.capacity) def cleanup_records(pipeline): record_keys = [make_record_key(timeline_key, record_key) for record_key, score in records] pipeline.delete(digest_key, *record_keys) def reschedule(): with connection.pipeline() as pipeline: pipeline.watch(digest_key) # This shouldn't be necessary, but better safe than sorry? pipeline.multi() cleanup_records(pipeline) pipeline.zrem(make_schedule_key(self.namespace, SCHEDULE_STATE_READY), key) pipeline.zadd(make_schedule_key(self.namespace, SCHEDULE_STATE_WAITING), time.time() + minimum_delay, key) pipeline.setex(make_last_processed_timestamp_key(timeline_key), self.ttl, int(time.time())) pipeline.execute() def unschedule(): with connection.pipeline() as pipeline: # Watch the timeline to ensure that no other transactions add # events to the timeline while we are trying to delete it. pipeline.watch(timeline_key) pipeline.multi() if connection.zcard(timeline_key) == 0: cleanup_records(pipeline) pipeline.delete(make_last_processed_timestamp_key(timeline_key)) pipeline.zrem(make_schedule_key(self.namespace, SCHEDULE_STATE_READY), key) pipeline.zrem(make_schedule_key(self.namespace, SCHEDULE_STATE_WAITING), key) pipeline.execute() # If there were records in the digest, we need to schedule it so # that we schedule any records that were added during digestion. If # there were no items, we can try to remove the timeline from the # digestion schedule. if records: reschedule() else: try: unschedule() except WatchError: logger.debug('Could not remove timeline from schedule, rescheduling instead') reschedule()