def test_descending(self): uuid_strs = list(get_str_uuids(10000)) uuids = sorted(map(lambda s: TimeUUID(s), uuid_strs)) descending_uuids = sorted( map(lambda s: TimeUUID(s, descending=True), uuid_strs)) self.assertEqual(uuids, descending_uuids[::-1]) for uu, duu in zip(uuids, descending_uuids[::-1]): self.assertTrue(uu == duu) self.assertTrue(uu <= duu) self.assertTrue(uu >= duu) self.assertFalse(uu != duu)
def retrieve(self, namespace, stream, start_time, end_time, start_id, configuration, order=ResultOrder.ASCENDING, limit=sys.maxint): """ Retrieves all the events for `stream` from `start_time` (inclusive) till `end_time` (inclusive). Alternatively to `start_time`, `start_id` can be provided, and then all events from `start_id` (exclusive) till `end_time` (inclusive) are returned. `start_id` should be used in cases when the client got disconnected from the server before all the events in the requested time window had been returned. `order` can be one of ResultOrder.ASCENDING or ResultOrder.DESCENDING. Returns an iterator over all JSON serialized (strings) events. """ if not start_id: start_id = uuid_from_kronos_time(start_time, _type=UUIDType.LOWEST) else: start_id = TimeUUID(start_id) if uuid_to_kronos_time(start_id) > end_time: return [] return self._retrieve(namespace, stream, start_id, end_time, order, limit, configuration)
def test_rollover(self): settings.storage.elasticsearch.rollover_size = 10 settings.storage.elasticsearch.rollover_check_period_seconds = 2 reload_router() indices = set() for i in xrange(50): indices.add(router.get_backend('elasticsearch') .index_manager.get_index('kronos')) self.put('test_rollover', [{TIMESTAMP_FIELD: 0} for _ in xrange(5)]) gevent.sleep(0.05) # Has index rolled over? self.assertTrue(len(indices) > 1) # No events were lost? events = self.get('test_rollover', 0, 1) self.assertEqual(len(events), 50 * 5) self.assertEqual(events, sorted(events, key=lambda e: TimeUUID(e[ID_FIELD]))) es = router.get_backend('elasticsearch').es index = router.get_backend('elasticsearch').index_manager.get_index( 'kronos') indices.discard(index) for index in indices: self.assertTrue(es.count(index=index)['count'] >= 10)
def delete(self, namespace, stream, start_time, end_time, start_id, configuration): if not start_id: start_id = uuid_from_kronos_time(start_time - 1, _type=UUIDType.HIGHEST) else: start_id = TimeUUID(start_id) if uuid_to_kronos_time(start_id) > end_time: return 0 return self._delete(namespace, stream, start_id, end_time, configuration)
def test_cmp(self): uuids = map(lambda s: TimeUUID(s), get_str_uuids(10000)) random.shuffle(uuids) for _ in xrange(10000): a, b = random.choice(uuids), random.choice(uuids) self.assertEqual(cmp(a, b), py_cmp(a, b)) for _ in xrange(1000): i = random.randint(1, 9999) self.assertTrue(uuids[i] == uuids[i]) self.assertTrue(uuids[i] >= uuids[i]) self.assertTrue(uuids[i] <= uuids[i]) self.assertFalse(uuids[i] != uuids[i]) self.assertFalse(uuids[i - 1] == uuids[i]) self.assertTrue(uuids[i - 1] != uuids[i])
def test_bytes_init(self): for uuid_str in get_str_uuids(10000): uu = UUID(uuid_str) tuu1 = TimeUUID(str(uu)) tuu2 = TimeUUID(bytes=uu.bytes) self.assertEqual(tuu1, tuu2)
def test_str(self): for uuid_str in get_str_uuids(20000): tuu = TimeUUID(uuid_str) self.assertEqual(uuid_str, str(tuu)) self.assertEqual('TimeUUID(%s)' % uuid_str, repr(tuu))
def test_bytes(self): for _id in get_str_uuids(10000): uu = UUID(_id) tuu = TimeUUID(_id) self.assertEqual(uu.bytes, tuu.bytes)
def test_time(self): for uuid_str in get_str_uuids(10000): uu = UUID(uuid_str) tuu = TimeUUID(uuid_str) self.assertEqual(uu.time, tuu.time)
def _retrieve(self, namespace, stream, start_id, end_time, order, limit, configuration): """ Yield events from stream starting after the event with id `start_id` until and including events with timestamp `end_time`. """ indices = self.index_manager.get_aliases(namespace, uuid_to_kronos_time(start_id), end_time) if not indices: return end_id = uuid_from_kronos_time(end_time, _type=UUIDType.HIGHEST) end_id.descending = start_id.descending = descending = ( order == ResultOrder.DESCENDING) start_time = uuid_to_kronos_time(start_id) body_query = { 'query': { 'filtered': { 'query': { 'match_all': {} }, 'filter': { 'range': { TIMESTAMP_FIELD: { 'gte': start_time, 'lte': end_time } } } } } } order = 'desc' if descending else 'asc' sort_query = [ '%s:%s' % (TIMESTAMP_FIELD, order), '%s:%s' % (ID_FIELD, order) ] last_id = end_id if descending else start_id scroll_id = None while True: size = max( min(limit, configuration['read_size']) / self.shards, 10) if scroll_id is None: res = self.es.search(index=indices, doc_type=stream, size=size, body=body_query, sort=sort_query, _source=True, scroll='1m', ignore=[400, 404], allow_no_indices=True, ignore_unavailable=True) else: res = self.es.scroll(scroll_id, scroll='1m') if '_scroll_id' not in res: break scroll_id = res['_scroll_id'] hits = res.get('hits', {}).get('hits') if not hits: break for hit in hits: _id = TimeUUID(hit['_id'], descending=descending) if _id <= last_id: continue last_id = _id event = hit['_source'] del event[LOGSTASH_TIMESTAMP_FIELD] yield json.dumps(event) limit -= 1 if limit == 0: break if scroll_id is not None: self.es.clear_scroll(scroll_id)
def deserialize(byts, protocol_version): return TimeUUID(bytes=byts)
def test_get(self): stream = 'TestKronosAPIs_test_get' event1 = [{'a': 1, TIMESTAMP_FIELD: 1}] event2 = [{'a': 2, TIMESTAMP_FIELD: 2}] event3 = [{'a': 3, TIMESTAMP_FIELD: 3}] event4 = [{'a': 4, TIMESTAMP_FIELD: 3}] # Test get from non-existent streams. events = self.get(stream, 0, 4) self.assertEqual(len(events), 0) # Test get with intervals that have and don't have events. self.put(stream, event1) events = self.get(stream, 0, 4) self.assertEqual(len(events), 1) events = self.get(stream, 2, 4) self.assertEqual(len(events), 0) # Test get with different time slices. self.put(stream, event2) events = self.get(stream, 0, 4) self.assertEqual(len(events), 2) self.assertEqual(events, sorted(events, key=lambda e: TimeUUID(e[ID_FIELD]))) events = self.get(stream, 2, 4) self.assertEqual(len(events), 1) event2_id = events[0][ID_FIELD] self.put(stream, event3) events = self.get(stream, 0, 4) self.assertEqual(len(events), 3) self.assertEqual(events, sorted(events, key=lambda e: TimeUUID(e[ID_FIELD]))) events = self.get(stream, 2, 4) self.assertEqual(len(events), 2) self.assertEqual(events, sorted(events, key=lambda e: TimeUUID(e[ID_FIELD]))) # Test get for overlapping time events. self.put(stream, event4) events = self.get(stream, 0, 4) self.assertEqual(len(events), 4) self.assertEqual(events, sorted(events, key=lambda e: TimeUUID(e[ID_FIELD]))) events = self.get(stream, 2, 4) self.assertEqual(len(events), 3) self.assertEqual(events, sorted(events, key=lambda e: TimeUUID(e[ID_FIELD]))) events = self.get(stream, 3, 4) self.assertEqual(len(events), 2) self.assertEqual(events, sorted(events, key=lambda e: TimeUUID(e[ID_FIELD]))) # Test get for `start_time` and `end_time` inclusivity. events = self.get(stream, 1, 3) self.assertEqual(len(events), 4) self.assertEqual(events, sorted(events, key=lambda e: TimeUUID(e[ID_FIELD]))) # Test get with `start_id`. events = self.get(stream, None, 4, start_id=event2_id) self.assertEqual(len(events), 2) self.assertEqual(events, sorted(events, key=lambda e: TimeUUID(e[ID_FIELD]))) for event in events: self.assertEqual(event[TIMESTAMP_FIELD], 3) # Test get with `limit`. events = self.get(stream, 0, 4, limit=2) self.assertEqual(len(events), 2) self.assertEqual(events, sorted(events, key=lambda e: TimeUUID(e[ID_FIELD]))) self.assertEqual(events[0][TIMESTAMP_FIELD], 1) self.assertEqual(events[1][TIMESTAMP_FIELD], 2) events = self.get(stream, 0, 4, limit=3) self.assertEqual(len(events), 3) self.assertEqual(events, sorted(events, key=lambda e: TimeUUID(e[ID_FIELD]))) self.assertEqual(events[0][TIMESTAMP_FIELD], 1) self.assertEqual(events[1][TIMESTAMP_FIELD], 2) self.assertEqual(events[2][TIMESTAMP_FIELD], 3) events = self.get(stream, 0, 4, limit=0) self.assertEqual(len(events), 0) # Test get with `order`. events = self.get(stream, 0, 4, order=ResultOrder.ASCENDING) self.assertEqual(len(events), 4) self.assertEqual(events, sorted(events, key=lambda e: TimeUUID(e[ID_FIELD]))) events = self.get(stream, 0, 4, order=ResultOrder.DESCENDING) self.assertEqual(len(events), 4) self.assertEqual( events, sorted(events, key=lambda e: TimeUUID(e[ID_FIELD], descending=True))) # Test get with weird time ranges. # `start_time` == `end_time` self.assertEqual(len(self.get(stream, 3, 3)), 2) self.assertEqual(len(self.get(stream, 4, 4)), 0) # `start_time` and `end_time` in the future. now = epoch_time_to_kronos_time(time.time()) self.assertEqual( len( self.get(stream, now + epoch_time_to_kronos_time(1000), now + epoch_time_to_kronos_time(2000))), 0) # `start_time` > `end_time` self.assertEqual(len(self.get(stream, 10, 5)), 0) # `start_time` < 0 and `end_time` < 0 self.assertEqual(len(self.get(stream, -2000, -1000)), 0)
def test_aliasing(self): settings.storage.elasticsearch.rollover_size = 10 reload_router(kill_update_thread=True) index1 = router.get_backend('elasticsearch').index_manager.get_index( 'kronos') time1 = datetime_to_kronos_time(datetime(2014, 1, 1, 0)) time2 = datetime_to_kronos_time(datetime(2014, 1, 2, 0)) time3 = datetime_to_kronos_time(datetime(2014, 1, 3, 0)) self.put('test_aliasing', [{TIMESTAMP_FIELD: time1, 'i': i, 'j': 0} for i in xrange(5)]) self.put('test_aliasing', [{TIMESTAMP_FIELD: time2, 'i': i, 'j': 0} for i in xrange(5)]) router.get_backend('elasticsearch').index_manager.update() index2 = router.get_backend('elasticsearch').index_manager.get_index( 'kronos') self.put('test_aliasing', [{TIMESTAMP_FIELD: time2, 'i': i, 'j': 1} for i in xrange(5)]) self.put('test_aliasing', [{TIMESTAMP_FIELD: time3, 'i': i, 'j': 1} for i in xrange(5)]) router.get_backend('elasticsearch').index_manager.update() index3 = router.get_backend('elasticsearch').index_manager.get_index( 'kronos') # Has index rolled over each time? self.assertTrue(index1 != index2) self.assertTrue(index2 != index3) self.assertTrue(index1 != index3) events = self.get('test_aliasing', time1, time3) self.assertEqual(len(events), 20) self.assertEqual(events, sorted(events, key=lambda e: TimeUUID(e[ID_FIELD]))) events = self.get('test_aliasing', time1, time2) self.assertEqual(len(events), 15) self.assertEqual(events, sorted(events, key=lambda e: TimeUUID(e[ID_FIELD]))) events = self.get('test_aliasing', time2, time3) self.assertEqual(len(events), 15) self.assertEqual(events, sorted(events, key=lambda e: TimeUUID(e[ID_FIELD]))) es = router.get_backend('elasticsearch').es aliases = es.indices.get_aliases(index=[index1, index2, index3]) self.assertEqual(len(aliases), 2) self.assertTrue(index1 in aliases) self.assertTrue(index2 in aliases) self.assertEqual(set(aliases[index1]['aliases']) & set(aliases[index2]['aliases']), set(['kronos_test:kronos:2014.01.02'])) self.assertEqual(set(aliases[index1]['aliases']), set(['kronos_test:kronos:2014.01.01', 'kronos_test:kronos:2014.01.02'])) self.assertEqual(set(aliases[index2]['aliases']), set(['kronos_test:kronos:2014.01.02', 'kronos_test:kronos:2014.01.03'])) self.assertEqual(es.count(index=index1)['count'], 10) self.assertEqual(es.count(index=index2)['count'], 10) self.assertEqual(es.count(index=index3, ignore_unavailable=True).get('count', 0), 0)