def is_matched_user(user_id, spec): metric_list = get_metric_list(spec["checks"]) time_type = spec['time_type'] if 'from_local_date' in spec and 'to_local_date' in spec: freq_metrics = enam.summarize_by_local_date(user_id, spec["from_local_date"], spec["to_local_date"], spec["freq"], metric_list, include_aggregate=False) elif 'start_time' in spec and 'end_time' in spec: freq_metrics = enam.summarize_by_timestamp(user_id, spec["start_time"], spec["end_time"], spec["freq"], metric_list, include_aggregate=False) else: # If no start and end times are specified, we assume that this is a # timestamp query because we can come up with a reasonable start and end # time for timestamps but not for local_dates, which are basically a filter. # so if we run this on the first of a month, for example, we won't find # anything, which seems bogus and not what people would expect assert time_type == "timestamp", "time_type = %s, expected timestamp" % time_type freq_metrics = enam.summarize_by_timestamp(user_id, 0, time.time(), spec["freq"], metric_list, include_aggregate=False) assert (freq_metrics is not None) assert ('user_metrics' in freq_metrics) curr_user_metrics = freq_metrics['user_metrics'] checks = spec['checks'] check_results = np.zeros(len(checks)) for i, check in enumerate(checks): curr_metric_result = curr_user_metrics[i] # curr_freq_result is a list of ModeStatTimeSummary objects, one for each # grouped time interval in the range # e.g. for daily, 2017-01-19, 2017-01-20, 2017-01-21, 2017-01-22, 2017-01-23, .... for msts in curr_metric_result: # We defined our check as being true if it is true for _any_ grouped time # period in the range. So as long as we find a match for that check, we are # good! if matches_check(check, msts): check_results[i] = True logging.info("For user_id %s, check result array = %s, all? %s" % (user_id, check_results, np.all(check_results))) return np.all(check_results)
def testCountTimestampMetrics(self): met_result = metrics.summarize_by_timestamp(self.testUUID, self.aug_start_ts, self.aug_end_ts, 'd', ['count'], True) logging.debug(met_result) self.assertEqual(list(met_result.keys()), ['aggregate_metrics', 'user_metrics']) user_met_result = met_result['user_metrics'][0] agg_met_result = met_result['aggregate_metrics'][0] self.assertEqual(len(user_met_result), 2) self.assertEqual([m.nUsers for m in user_met_result], [1,1]) self.assertEqual(user_met_result[0].local_dt.day, 27) self.assertEqual(user_met_result[1].local_dt.day, 28) self.assertEqual(user_met_result[0].ON_FOOT, 4) self.assertEqual(user_met_result[0].BICYCLING, 2) # Changed from 3 to 4 - investigation at # https://github.com/e-mission/e-mission-server/issues/288#issuecomment-242531798 self.assertEqual(user_met_result[0].IN_VEHICLE, 4) # We are not going to make absolute value assertions about # the aggregate values since they are affected by other # entries in the database. However, because we have at least # data for two days in the database, the aggregate data # must be at least that much larger than the original data. self.assertEqual(len(agg_met_result), 8) # no overlap between users at the daily level # bunch of intermediate entries with no users since this binning works # by range self.assertEqual([m.nUsers for m in agg_met_result], [1,1,0,0,0,0,1,1]) # If there are no users, there are no values for any of the fields # since these are never negative, it implies that their sum is zero self.assertTrue('ON_FOOT' not in agg_met_result[2] and 'BICYCLING' not in agg_met_result[2] and 'IN_VEHICLE' not in agg_met_result[2])
def testCountTimestampMetrics(self): met_result = metrics.summarize_by_timestamp(self.testUUID, self.aug_start_ts, self.aug_end_ts, 'd', 'count') logging.debug(met_result) self.assertEqual(met_result.keys(), ['aggregate_metrics', 'user_metrics']) user_met_result = met_result['user_metrics'] agg_met_result = met_result['aggregate_metrics'] self.assertEqual(len(user_met_result), 2) self.assertEqual([m.nUsers for m in user_met_result], [1, 1]) self.assertEqual(user_met_result[0].local_dt.day, 27) self.assertEqual(user_met_result[1].local_dt.day, 28) self.assertEqual(user_met_result[0].ON_FOOT, 4) self.assertEqual(user_met_result[0].BICYCLING, 2) self.assertEqual(user_met_result[0].IN_VEHICLE, 3) # We are not going to make absolute value assertions about # the aggregate values since they are affected by other # entries in the database. However, because we have at least # data for two days in the database, the aggregate data # must be at least that much larger than the original data. self.assertEqual(len(agg_met_result), 8) # no overlap between users at the daily level # bunch of intermediate entries with no users since this binning works # by range self.assertEqual([m.nUsers for m in agg_met_result], [1, 1, 0, 0, 0, 0, 1, 1]) # If there are no users, there are no values for any of the fields # since these are never negative, it implies that their sum is zero self.assertTrue('ON_FOOT' not in agg_met_result[2] and 'BICYCLING' not in agg_met_result[2] and 'IN_VEHICLE' not in agg_met_result[2])
def is_matched_user(user_id, spec): metric_list = get_metric_list(spec["checks"]) time_type = spec['time_type'] if 'from_local_date' in spec and 'to_local_date' in spec: freq_metrics = enam.summarize_by_local_date(user_id, spec["from_local_date"], spec["to_local_date"], spec["freq"], metric_list, include_aggregate=False) elif 'start_time' in spec and 'end_time' in spec: freq_metrics = enam.summarize_by_timestamp(user_id, spec["start_time"], spec["end_time"], spec["freq"], metric_list, include_aggregate=False) else: # If no start and end times are specified, we assume that this is a # timestamp query because we can come up with a reasonable start and end # time for timestamps but not for local_dates, which are basically a filter. # so if we run this on the first of a month, for example, we won't find # anything, which seems bogus and not what people would expect assert time_type == "timestamp", "time_type = %s, expected timestamp" % time_type freq_metrics = enam.summarize_by_timestamp(user_id, 0, time.time(), spec["freq"], metric_list, include_aggregate=False) assert(freq_metrics is not None) assert('user_metrics' in freq_metrics) curr_user_metrics = freq_metrics['user_metrics'] checks = spec['checks'] check_results = np.zeros(len(checks)) for i, check in enumerate(checks): curr_metric_result = curr_user_metrics[i] # curr_freq_result is a list of ModeStatTimeSummary objects, one for each # grouped time interval in the range # e.g. for daily, 2017-01-19, 2017-01-20, 2017-01-21, 2017-01-22, 2017-01-23, .... for msts in curr_metric_result: # We defined our check as being true if it is true for _any_ grouped time # period in the range. So as long as we find a match for that check, we are # good! if matches_check(check, msts): check_results[i] = True logging.info("For user_id %s, check result array = %s, all? %s" % (user_id, check_results, np.all(check_results))) return np.all(check_results)
def testCountNoEntries(self): # Ensure that we don't crash if we don't find any entries # Should return empty array instead # Unlike in https://amplab.cs.berkeley.edu/jenkins/job/e-mission-server-prb/591/ met_result_ld = metrics.summarize_by_local_date(self.testUUID, ecwl.LocalDate({'year': 2000}), ecwl.LocalDate({'year': 2001}), 'MONTHLY', ['count'], True) self.assertEqual(list(met_result_ld.keys()), ['aggregate_metrics', 'user_metrics']) self.assertEqual(met_result_ld['aggregate_metrics'][0], []) self.assertEqual(met_result_ld['user_metrics'][0], []) met_result_ts = metrics.summarize_by_timestamp(self.testUUID, arrow.get(2000,1,1).timestamp, arrow.get(2001,1,1).timestamp, 'm', ['count'], True) self.assertEqual(list(met_result_ts.keys()), ['aggregate_metrics', 'user_metrics']) self.assertEqual(met_result_ts['aggregate_metrics'][0], []) self.assertEqual(met_result_ts['user_metrics'][0], [])
def testCountNoEntries(self): # Ensure that we don't crash if we don't find any entries # Should return empty array instead # Unlike in https://amplab.cs.berkeley.edu/jenkins/job/e-mission-server-prb/591/ met_result_ld = metrics.summarize_by_local_date(self.testUUID, ecwl.LocalDate({'year': 2000}), ecwl.LocalDate({'year': 2001}), 'MONTHLY', ['count'], True) self.assertEqual(met_result_ld.keys(), ['aggregate_metrics', 'user_metrics']) self.assertEqual(met_result_ld['aggregate_metrics'][0], []) self.assertEqual(met_result_ld['user_metrics'][0], []) met_result_ts = metrics.summarize_by_timestamp(self.testUUID, arrow.get(2000,1,1).timestamp, arrow.get(2001,1,1).timestamp, 'm', ['count'], True) self.assertEqual(met_result_ts.keys(), ['aggregate_metrics', 'user_metrics']) self.assertEqual(met_result_ts['aggregate_metrics'][0], []) self.assertEqual(met_result_ts['user_metrics'][0], [])
def testCountTimestampMetrics(self): met_result = metrics.summarize_by_timestamp(self.testUUID, self.aug_start_ts, self.aug_end_ts, 'd', ['count'], True) import json import bson.json_util as bju logging.debug(json.dumps(met_result, default=bju.default)) self.assertEqual(list(met_result.keys()), ['aggregate_metrics', 'user_metrics']) user_met_result = met_result['user_metrics'][0] agg_met_result = met_result['aggregate_metrics'][0] self.assertEqual(len(user_met_result), 2) self.assertEqual([m.nUsers for m in user_met_result], [1, 1]) self.assertEqual(user_met_result[0].local_dt.day, 27) self.assertEqual(user_met_result[1].local_dt.day, 28) self.assertEqual(user_met_result[0].WALKING, 7) self.assertNotIn("BICYCLING", user_met_result[0]) # Changed from 3 to 4 - investigation at # https://github.com/e-mission/e-mission-server/issues/288#issuecomment-242531798 self.assertEqual(user_met_result[0].BUS, 4) # We are not going to make absolute value assertions about # the aggregate values since they are affected by other # entries in the database. However, because we have at least # data for two days in the database, the aggregate data # must be at least that much larger than the original data. self.assertEqual(len(agg_met_result), 8) # no overlap between users at the daily level # bunch of intermediate entries with no users since this binning works # by range self.assertEqual([m.nUsers for m in agg_met_result], [1, 1, 0, 0, 0, 0, 1, 1]) # If there are no users, there are no values for any of the fields # since these are never negative, it implies that their sum is zero self.assertTrue('WALKING' not in agg_met_result[2] and 'BICYCLING' not in agg_met_result[2] and 'IN_VEHICLE' not in agg_met_result[2])