def setUp(self): super(TestModels, self).setUp() Stats.delete_all() connection.use_debug_cursor = True connection.cursor().execute("SET enable_seqscan = off") self.preserve_stats_wipe = settings.STATS_SIMPLE_WIPE
def test_stats(self): outdated = Stats.insert((id, point.dt, point.sum) for point in points) self.assertEqual(outdated, []) with assertQueries(): point = Stats.latest(id) sample = id, point.dt, point.sum self.assertEqual(Stats.insert([sample]), [sample]) self.assertEqual(point.timestamp % 10, 0) self.assertGreater(point, points[-2]) self.assertLessEqual(point, points[-1]) count = len(points) + 1 for stat in Stats[:-1]: timestamps = map( operator.attrgetter('timestamp'), Stats.select( id, point.dt - (stat.expiration_time - timedelta(seconds=stat.step)), point.dt)) self.assertLess(len(timestamps), count) count = len(timestamps) steps = set(y - x for x, y in zip(timestamps, timestamps[1:])) self.assertLessEqual(len(steps), 1) timestamps = map( operator.attrgetter('timestamp'), Stats.select(id, point.dt - timedelta(hours=1), point.dt, maxlen=100)) self.assertLessEqual(len(timestamps), 100) self.assertFalse(any(timestamp % 60 for timestamp in timestamps)) self.assertTrue(any(timestamp % 300 for timestamp in timestamps)) for point in Stats.select(id, point.dt - timedelta(hours=1), point.dt, rate=True): self.assertEqual(point[1:], (1.0, 10)) selection = list( Stats.select(id, now - timedelta(seconds=30), now + timedelta(seconds=30), fixed=3)) self.assertEqual(len(selection), 3) # This result of this can vary depending on how settings is configured, if we have only 1 day of 10 seconds then we get a different # answer to if we have more than a day. So cope with both configurations. If Stats[0] is now then it using the 60 second roll up and # so len = 6 (6 times 10) otherwise we get 3 (the 3 after 'now' because all dates are in the future) if Stats[0].start(id) < now: self.assertEqual(sum(point.len for point in selection), 3) else: # See HYD-3960 - This value does not always come out as 6 and so this code will fail, because there are 100 # points this case is never tested (see comments above - look for HYD-3660) but when it is run the value ends up as 4, 5 or 6 # I (Chris) don't know why it varies and when I've looked for patterns and not found any. self.assertEqual(sum(point.len for point in selection), 6) self.assertEqual(selection[0].len, 0) point, = Stats.select(id, now, now + timedelta(seconds=5), fixed=1) with assertQueries(*['DELETE'] * 5): Stats.delete(id) for model in Stats: self.assertListEqual(list(model.select(id)), [])
def _test_monster_data(self, simple_wipe, ids_to_create=500, job_stats_to_create=50, days=365 * 10): """ Push 10 years worth of data through that stats system for 550 (50 of which are jobstats) ids. """ date = start_time = datetime.now(utc) end_date = now + timedelta(days=days) settings.STATS_SIMPLE_WIPE = simple_wipe first_job_stat = ids_to_create + 1 iterations_completed = 0 with mock.patch("chroma_core.models.stats.datetime") as dt: while date < end_date: data = [] for id in xrange(0, ids_to_create): data.append((id, date, id)) for id in xrange(0, job_stats_to_create): data.append((first_job_stat + id, date, id)) dt.now.return_value = date Stats.insert(data) date += timedelta(seconds=10) first_job_stat += 1 iterations_completed += 1 end_time = datetime.now(utc) print( "Time to run test_monster_data %s, time per 10 second step %s, wipe=%s" % (end_time - start_time, (end_time - start_time) / iterations_completed, settings.STATS_SIMPLE_WIPE)) # This test fails if we are not using SIMPLE_WIPE and we have job_stats, so don't run the test # in that case. if settings.STATS_SIMPLE_WIPE or job_stats_to_create == 0: for stat in Stats: actual_records = stat.objects.count() max_expected_records = ( ids_to_create * total_seconds(stat.expiration_time + stat.flush_orphans_interval) / stat.step) self.assertLess(actual_records, max_expected_records)
def fetch(self, fetch_metrics, begin, end, max_points=float("inf"), num_points=0): "Return datetimes with dicts of field names and values." result = collections.defaultdict(dict) types = set() begin = Stats[0].round(begin) # exclude points from a partial sample end = Stats[0].round(end) # exclude points from a partial sample for series in Series.filter(self.measured_object, name__in=fetch_metrics): types.add(series.type) minimum = 0.0 if series.type == "Counter" else float("-inf") for point in Stats.select(series.id, begin, end, rate=series.type in ("Counter", "Derive"), maxlen=max_points, fixed=num_points): result[point.dt][series.name] = max(minimum, point.mean) # if absolute and derived values are mixed, the earliest value will be incomplete if result and types > set(["Gauge"]) and len( result[min(result)]) < len(fetch_metrics): del result[min(result)] return dict(result)
def fetch_last(self, fetch_metrics): "Return latest datetime and dict of field names and values." latest, data = datetime.fromtimestamp(0, utc), {} for series in Series.filter(self.measured_object, name__in=fetch_metrics): point = Stats.latest(series.id) data[series.name] = point.mean latest = max(latest, point.dt) return latest, data
def setUp(self): ChromaApiTestCase.setUp(self) fixture = collections.defaultdict(list) for line in open( os.path.join(os.path.dirname(__file__), "fixtures/stats.sjson")): data = json.loads(line.strip()) fixture[data["type"], data["id"]].append( (data["time"], data["data"])) # create gaps in data to test alignment for key in (min(fixture), max(fixture)): del fixture[key][-1] self.hosts = [ synthetic_host("myserver{0:d}".format(n)) for n in range(2) ] self.mgt, mounts = ManagedMgs.create_for_volume(synthetic_volume_full( self.hosts[0]).id, name="MGS") self.fs = ManagedFilesystem.objects.create(mgs=self.mgt, name="testfs") ObjectCache.add(ManagedFilesystem, self.fs) self.mdt, mounts = ManagedMdt.create_for_volume(synthetic_volume_full( self.hosts[0]).id, filesystem=self.fs) ObjectCache.add(ManagedTarget, self.mdt.managedtarget_ptr) for tm in mounts: ObjectCache.add(ManagedTargetMount, tm) self.osts = [ ManagedOst.create_for_volume(synthetic_volume_full( self.hosts[1]).id, filesystem=self.fs)[0] for n in range(2) ] # store fixture data with corresponding targets for target, key in zip(self.hosts + [self.mdt] + self.osts, sorted(fixture)): store = metrics.MetricStore.new(target) kwargs = { "jobid_var": "procname_uid" } if isinstance(target, ManagedOst) else {} for timestamp, value in fixture[key]: Stats.insert(store.serialize(value, timestamp, **kwargs)) for model in Stats: model.cache.clear()
def insert(self, samples): try: outdated = Stats.insert((id, dateparse.parse_datetime(dt), value) for id, dt, value in samples) except db.IntegrityError: log.error("Duplicate stats insert: " + db.connection.queries[-1]['sql']) db.transaction.rollback() # allow future stats to still work except: log.error("Error handling stats insert: " + traceback.format_exc()) else: if outdated: log.warn("Outdated samples ignored: {0}".format(outdated))
def fetch_jobs(self, metric, begin, end, job, max_points=float("inf"), num_points=0): "Return datetimes with dicts of field names and values." result = collections.defaultdict(dict) types = set() begin = Stats[0].round(begin) # exclude points from a partial sample end = Stats[0].round(end) # exclude points from a partial sample series_ids = Series.filter(self.measured_object, name__startswith="job_" + metric).values("id") series_ids = Stats[0].objects.filter( id__in=series_ids, dt__gte=begin).values("id").distinct("id") for series in Series.filter(self.measured_object, id__in=series_ids): types.add(series.type) for point in Stats.select(series.id, begin, end, rate=True, maxlen=max_points, fixed=num_points): result[point.dt][series.name.split("_", 3)[-1]] = point assert types.issubset(Series.JOB_TYPES) # translate job ids into metadata metadata = dict((job_id, job_id) for points in result.values() for job_id in points) if job != "id": for type in types: # there should generally be only one metadata.update(scheduler.metadata(type, job, metadata)) for dt in result: data = collections.defaultdict(lambda: Point.zero) for job_id, point in result[dt].items(): data[metadata[job_id]] += point result[dt] = dict((key, max(0.0, data[key].mean)) for key in data) return dict(result)
def clear(self): "Remove all associated series." for series in Series.filter(self.measured_object): series.delete() Stats.delete(series.id)
def tearDown(self): connection.cursor().execute("SET enable_seqscan = on") connection.use_debug_cursor = False Stats.delete_all() settings.STATS_SIMPLE_WIPE = self.preserve_stats_wipe
def tearDown(self): Stats.delete_all() settings.STATS_SIMPLE_WIPE = self.preserve_stats_wipe
def setUp(self): Stats.delete_all() self.preserve_stats_wipe = settings.STATS_SIMPLE_WIPE