def post(self):
     dao = MetricsBucketDao()
     resource = request.get_data()
     if resource:
         resource_json = json.loads(resource)
         start_date_str = resource_json.get('start_date')
         end_date_str = resource_json.get('end_date')
         if not start_date_str or not end_date_str:
             raise BadRequest("Start date and end date should not be empty")
         try:
             start_date = datetime.datetime.strptime(
                 start_date_str, DATE_FORMAT).date()
         except ValueError:
             raise BadRequest("Invalid start date: %s" % start_date_str)
         try:
             end_date = datetime.datetime.strptime(end_date_str,
                                                   DATE_FORMAT).date()
         except ValueError:
             raise BadRequest("Invalid end date: %s" % end_date_str)
         date_diff = abs((end_date - start_date).days)
         if date_diff > DAYS_LIMIT:
             raise BadRequest("Difference between start date and end date "\
               "should not be greater than %s days" % DAYS_LIMIT)
         buckets = dao.get_active_buckets(start_date, end_date)
         if buckets is None:
             return []
         return [dao.to_client_json(bucket) for bucket in buckets]
     else:
         raise BadRequest("Request data is empty")
 def setUp(self):
     super(MetricsApiTest, self).setUp()
     self.version_dao = MetricsVersionDao()
     self.bucket_dao = MetricsBucketDao()
     self.today = datetime.date.today()
     self.tomorrow = self.today + datetime.timedelta(days=1)
     self.expected_bucket_1 = {
         'facets': {
             'date': self.today.isoformat()
         },
         'entries': {
             'x': 'a'
         }
     }
     self.expected_bucket_2 = {
         'facets': {
             'date': self.today.isoformat(),
             'hpoId': 'PITT'
         },
         'entries': {
             'x': 'b'
         }
     }
     self.expected_bucket_3 = {
         'facets': {
             'date': self.tomorrow.isoformat()
         },
         'entries': {
             'y': 'c'
         }
     }
def reduce_hpo_date_metric_counts_to_database_buckets(reducer_key, reducer_values, version_id=None):
  """Emits a metrics bucket with counts for metrics for a given hpoId + date to SQL
  Args:
     reducer_key: hpoId|date ('*' for hpoId for cross-HPO counts)
     reducer_values: list of participant_type|metric|count strings
  """
  metrics_dict = collections.defaultdict(lambda: 0)
  (hpo_id, date_str) = parse_tuple(reducer_key)
  if hpo_id == '*':
    hpo_id = ''
  date = datetime.strptime(date_str, DATE_FORMAT)
  for reducer_value in reducer_values:
    (participant_type, metric_key, count) = parse_tuple(reducer_value)
    if metric_key == PARTICIPANT_KIND:
      if participant_type == _REGISTERED_PARTICIPANT:
        metrics_dict[metric_key] += int(count)
    else:
      kind = FULL_PARTICIPANT_KIND if participant_type == _FULL_PARTICIPANT else PARTICIPANT_KIND
      metrics_dict['%s.%s' % (kind, metric_key)] += int(count)

  version_id = version_id or context.get().mapreduce_spec.mapper.params.get('version_id')
  bucket = MetricsBucket(metricsVersionId=version_id,
                         date=date,
                         hpoId=hpo_id,
                         metrics=json.dumps(metrics_dict))
  # Use upsert here; when reducer shards retry, we will just replace any metrics bucket that was
  # written before, rather than failing.
  MetricsBucketDao().upsert(bucket)
Exemplo n.º 4
0
 def post(self):
   dao = MetricsBucketDao()
   resource = request.get_data()
   start_date = None
   end_date = None
   if resource:
     resource_json = json.loads(resource)
     start_date_str = resource_json.get('start_date')
     end_date_str = resource_json.get('end_date')
     if start_date_str:
       try:
         start_date = datetime.datetime.strptime(start_date_str, DATE_FORMAT).date()
       except ValueError:
         raise BadRequest("Invalid start date: %s" % start_date_str)
     if end_date_str:
       try:
         end_date = datetime.datetime.strptime(end_date_str, DATE_FORMAT).date()
       except ValueError:
         raise BadRequest("Invalid start date: %s" % end_date_str)
   buckets = dao.get_active_buckets(start_date, end_date)
   if buckets is None:
     return []
   return [dao.to_client_json(bucket) for bucket in buckets]
class MetricsApiTest(FlaskTestBase):

  def setUp(self):
    super(MetricsApiTest, self).setUp()
    self.version_dao = MetricsVersionDao()
    self.bucket_dao = MetricsBucketDao()
    self.today = datetime.date.today()
    self.tomorrow = self.today + datetime.timedelta(days=1)
    self.expected_bucket_1 = {'facets': {'date': self.today.isoformat()},
                              'entries': {'x': 'a'}}
    self.expected_bucket_2 = {'facets': {'date': self.today.isoformat(),
                                         'hpoId': 'PITT'},
                              'entries': {'x': 'b'}}
    self.expected_bucket_3 = {'facets': {'date': self.tomorrow.isoformat()},
                              'entries': {'y': 'c'}}


  def test_get_metrics_no_data(self):
    response = self.send_post('Metrics', {'start_date': self.today.isoformat(),
                                          'end_date': self.tomorrow.isoformat()})
    self.assertEquals([], response)

  def test_get_metrics_no_buckets(self):
    self.version_dao.set_pipeline_in_progress()
    self.version_dao.set_pipeline_finished(True)
    response = self.send_post('Metrics', {'start_date': self.today.isoformat(),
                                          'end_date': self.tomorrow.isoformat()})
    self.assertEquals([], response)

  def setup_buckets(self):
    self.version_dao.set_pipeline_in_progress()
    metrics_bucket_1 = MetricsBucket(metricsVersionId=1, date=self.today, hpoId='',
                                     metrics='{ "x": "a" }')
    metrics_bucket_2 = MetricsBucket(metricsVersionId=1, date=self.today, hpoId='PITT',
                                     metrics='{ "x": "b" }')

    metrics_bucket_3 = MetricsBucket(metricsVersionId=1, date=self.tomorrow, hpoId='',
                                     metrics='{ "y": "c" }')
    self.bucket_dao.insert(metrics_bucket_1)
    self.bucket_dao.insert(metrics_bucket_2)
    self.bucket_dao.insert(metrics_bucket_3)
    self.version_dao.set_pipeline_finished(True)

  def test_get_metrics_with_buckets_and_today_start_date_tomorrow_end_date(self):
    self.setup_buckets()
    response = self.send_post('Metrics', {'start_date': self.today.isoformat(),
                                          'end_date': self.tomorrow.isoformat()})
    self.assertEquals([self.expected_bucket_1, self.expected_bucket_2,
                       self.expected_bucket_3], response)

  def test_get_metrics_with_buckets_and_today_start_end_date(self):
    self.setup_buckets()
    response = self.send_post('Metrics', {'start_date': self.today.isoformat(),
                                          'end_date': self.today.isoformat()})
    self.assertEquals([self.expected_bucket_1, self.expected_bucket_2], response)

  def test_get_metrics_with_buckets_and_tomorrow_start_end_date(self):
    self.setup_buckets()
    response = self.send_post('Metrics', {'start_date': self.tomorrow.isoformat(),
                                          'end_date': self.tomorrow.isoformat()})
    self.assertEquals([self.expected_bucket_3], response)

  def test_get_metrics_with_no_buckets_and_tomorrow_start_date_today_end_date(self):
    self.setup_buckets()
    response = self.send_post('Metrics', {'start_date': self.tomorrow.isoformat(),
                                          'end_date': self.today.isoformat()})
    self.assertEquals([], response)
Exemplo n.º 6
0
 def setUp(self):
     super(MetricsDaoTest, self).setUp()
     self.metrics_version_dao = MetricsVersionDao()
     self.metrics_bucket_dao = MetricsBucketDao()
Exemplo n.º 7
0
class MetricsDaoTest(SqlTestBase):
    def setUp(self):
        super(MetricsDaoTest, self).setUp()
        self.metrics_version_dao = MetricsVersionDao()
        self.metrics_bucket_dao = MetricsBucketDao()

    def test_get_before_insert(self):
        self.assertIsNone(self.metrics_version_dao.get(1))
        self.assertIsNone(self.metrics_version_dao.get_with_children(1))
        self.assertIsNone(self.metrics_bucket_dao.get([1, TIME, None]))
        self.assertIsNone(self.metrics_bucket_dao.get([1, TIME, PITT]))
        self.assertIsNone(self.metrics_version_dao.get_version_in_progress())
        self.assertIsNone(self.metrics_version_dao.get_serving_version())
        self.assertIsNone(self.metrics_bucket_dao.get_active_buckets())

    def test_set_pipeline_in_progress(self):
        with FakeClock(TIME):
            self.metrics_version_dao.set_pipeline_in_progress()
        expected_mv = MetricsVersion(metricsVersionId=1,
                                     inProgress=True,
                                     complete=False,
                                     date=TIME,
                                     dataVersion=SERVING_METRICS_DATA_VERSION)
        self.assertEquals(expected_mv.asdict(),
                          self.metrics_version_dao.get(1).asdict())
        self.assertEquals(
            expected_mv.asdict(),
            self.metrics_version_dao.get_version_in_progress().asdict())
        self.assertIsNone(self.metrics_version_dao.get_serving_version())
        self.assertIsNone(self.metrics_bucket_dao.get_active_buckets())

    def test_set_pipeline_in_progress_while_in_progress(self):
        with FakeClock(TIME):
            self.metrics_version_dao.set_pipeline_in_progress()

        with FakeClock(TIME_2):
            with self.assertRaises(PreconditionFailed):
                # Until a day passes, setting the pipeline in progress will raise an error.
                self.metrics_version_dao.set_pipeline_in_progress()

        # After a day passes, break the lock.
        with FakeClock(TIME_3):
            self.metrics_version_dao.set_pipeline_in_progress()
        expected_mv = MetricsVersion(metricsVersionId=1,
                                     inProgress=False,
                                     complete=False,
                                     date=TIME,
                                     dataVersion=SERVING_METRICS_DATA_VERSION)
        self.assertEquals(expected_mv.asdict(),
                          self.metrics_version_dao.get(1).asdict())
        expected_mv2 = MetricsVersion(metricsVersionId=2,
                                      inProgress=True,
                                      complete=False,
                                      date=TIME_3,
                                      dataVersion=SERVING_METRICS_DATA_VERSION)
        self.assertEquals(expected_mv2.asdict(),
                          self.metrics_version_dao.get(2).asdict())

    def test_set_pipeline_finished_not_in_progress(self):
        self.metrics_version_dao.set_pipeline_finished(True)

    def test_set_pipeline_finished_in_progress_no_buckets(self):
        with FakeClock(TIME):
            self.metrics_version_dao.set_pipeline_in_progress()
        with FakeClock(TIME_2):
            self.metrics_version_dao.set_pipeline_finished(True)
        expected_mv = MetricsVersion(metricsVersionId=1,
                                     inProgress=False,
                                     complete=True,
                                     date=TIME,
                                     dataVersion=SERVING_METRICS_DATA_VERSION)
        self.assertEquals(expected_mv.asdict(),
                          self.metrics_version_dao.get(1).asdict())
        self.assertEquals([], self.metrics_bucket_dao.get_active_buckets())

    def test_set_pipeline_finished_in_progress_with_buckets(self):
        with FakeClock(TIME):
            self.metrics_version_dao.set_pipeline_in_progress()
        metrics_bucket_1 = MetricsBucket(metricsVersionId=1,
                                         date=datetime.date.today(),
                                         hpoId='',
                                         metrics='foo')
        metrics_bucket_2 = MetricsBucket(metricsVersionId=1,
                                         date=datetime.date.today(),
                                         hpoId=PITT,
                                         metrics='bar')
        tomorrow = datetime.date.today() + datetime.timedelta(days=1)
        metrics_bucket_3 = MetricsBucket(metricsVersionId=1,
                                         date=tomorrow,
                                         hpoId='',
                                         metrics='baz')
        self.metrics_bucket_dao.insert(metrics_bucket_1)
        self.metrics_bucket_dao.insert(metrics_bucket_2)
        self.metrics_bucket_dao.insert(metrics_bucket_3)
        with FakeClock(TIME_2):
            self.metrics_version_dao.set_pipeline_finished(True)
        expected_mv = MetricsVersion(metricsVersionId=1,
                                     inProgress=False,
                                     complete=True,
                                     date=TIME,
                                     dataVersion=SERVING_METRICS_DATA_VERSION)
        self.assertEquals(
            expected_mv.asdict(),
            self.metrics_version_dao.get_serving_version().asdict())
        active_buckets = self.metrics_bucket_dao.get_active_buckets()
        self.assertEquals(3, len(active_buckets))
        self.assertEquals(metrics_bucket_1.asdict(),
                          active_buckets[0].asdict())
        self.assertEquals(metrics_bucket_2.asdict(),
                          active_buckets[1].asdict())
        self.assertEquals(metrics_bucket_3.asdict(),
                          active_buckets[2].asdict())

        # Filter on start date.
        active_buckets = self.metrics_bucket_dao.get_active_buckets(
            start_date=datetime.date.today())
        self.assertEquals(3, len(active_buckets))
        self.assertEquals(metrics_bucket_1.asdict(),
                          active_buckets[0].asdict())
        self.assertEquals(metrics_bucket_2.asdict(),
                          active_buckets[1].asdict())
        self.assertEquals(metrics_bucket_3.asdict(),
                          active_buckets[2].asdict())

        active_buckets = self.metrics_bucket_dao.get_active_buckets(
            start_date=tomorrow)
        self.assertEquals(1, len(active_buckets))
        self.assertEquals(metrics_bucket_3.asdict(),
                          active_buckets[0].asdict())

        # Filter on end date.
        active_buckets = self.metrics_bucket_dao.get_active_buckets(
            end_date=tomorrow)
        self.assertEquals(3, len(active_buckets))
        self.assertEquals(metrics_bucket_1.asdict(),
                          active_buckets[0].asdict())
        self.assertEquals(metrics_bucket_2.asdict(),
                          active_buckets[1].asdict())
        self.assertEquals(metrics_bucket_3.asdict(),
                          active_buckets[2].asdict())

        active_buckets = self.metrics_bucket_dao.get_active_buckets(
            end_date=datetime.date.today())
        self.assertEquals(2, len(active_buckets))
        self.assertEquals(metrics_bucket_1.asdict(),
                          active_buckets[0].asdict())
        self.assertEquals(metrics_bucket_2.asdict(),
                          active_buckets[1].asdict())

    def test_insert_duplicate_bucket(self):
        with FakeClock(TIME):
            self.metrics_version_dao.set_pipeline_in_progress()
        metrics_bucket_1 = MetricsBucket(metricsVersionId=1,
                                         date=datetime.date.today(),
                                         hpoId=PITT,
                                         metrics='foo')
        metrics_bucket_2 = MetricsBucket(metricsVersionId=1,
                                         date=datetime.date.today(),
                                         hpoId=PITT,
                                         metrics='bar')
        self.metrics_bucket_dao.insert(metrics_bucket_1)
        with self.assertRaises(IntegrityError):
            self.metrics_bucket_dao.insert(metrics_bucket_2)

        # Upsert should work, and replace the bucket.
        self.metrics_bucket_dao.upsert(metrics_bucket_2)
        self.assertEquals(
            metrics_bucket_2.asdict(),
            self.metrics_bucket_dao.get([1, datetime.date.today(),
                                         PITT]).asdict())

    def test_delete_old_metrics(self):
        with FakeClock(TIME):
            self.metrics_version_dao.set_pipeline_in_progress()
        metrics_bucket_1 = MetricsBucket(metricsVersionId=1,
                                         date=datetime.date.today(),
                                         hpoId='',
                                         metrics='foo')
        metrics_bucket_2 = MetricsBucket(metricsVersionId=1,
                                         date=datetime.date.today(),
                                         hpoId=PITT,
                                         metrics='bar')
        self.metrics_bucket_dao.insert(metrics_bucket_1)
        self.metrics_bucket_dao.insert(metrics_bucket_2)

        # For up to 3 days, the metrics stay around.
        with FakeClock(TIME_4):
            self.metrics_version_dao.delete_old_versions()
            expected_mv = MetricsVersion(
                metricsVersionId=1,
                inProgress=True,
                complete=False,
                date=TIME,
                dataVersion=SERVING_METRICS_DATA_VERSION)
            expected_mv.buckets.append(metrics_bucket_1)
            expected_mv.buckets.append(metrics_bucket_2)
            self.assertEquals(
                expected_mv.asdict(follow=['buckets']),
                self.metrics_version_dao.get_with_children(1).asdict(
                    follow=['buckets']))

        # After 3 days, the metrics are gone.
        with FakeClock(TIME_5):
            self.metrics_version_dao.delete_old_versions()
            self.assertIsNone(self.metrics_version_dao.get_with_children(1))