def test_run_query_with_adhoc_metric(self):
        client = Mock()
        from_dttm = Mock()
        to_dttm = Mock()
        from_dttm.replace = Mock(return_value=from_dttm)
        to_dttm.replace = Mock(return_value=to_dttm)
        from_dttm.isoformat = Mock(return_value='from')
        to_dttm.isoformat = Mock(return_value='to')
        timezone = 'timezone'
        from_dttm.tzname = Mock(return_value=timezone)
        ds = DruidDatasource(datasource_name='datasource')
        metric1 = DruidMetric(metric_name='metric1')
        metric2 = DruidMetric(metric_name='metric2')
        ds.metrics = [metric1, metric2]
        col1 = DruidColumn(column_name='col1')
        col2 = DruidColumn(column_name='col2')
        ds.columns = [col1, col2]
        all_metrics = []
        post_aggs = ['some_agg']
        ds._metrics_and_post_aggs = Mock(return_value=(all_metrics, post_aggs))
        groupby = []
        metrics = [{
            'expressionType': 'SIMPLE',
            'column': {
                'type': 'DOUBLE',
                'column_name': 'col1'
            },
            'aggregate': 'SUM',
            'label': 'My Adhoc Metric',
        }]

        ds.get_having_filters = Mock(return_value=[])
        client.query_builder = Mock()
        client.query_builder.last_query = Mock()
        client.query_builder.last_query.query_dict = {'mock': 0}
        # no groupby calls client.timeseries
        ds.run_query(
            groupby,
            metrics,
            None,
            from_dttm,
            to_dttm,
            client=client,
            filter=[],
            row_limit=100,
        )
        self.assertEqual(0, len(client.topn.call_args_list))
        self.assertEqual(0, len(client.groupby.call_args_list))
        self.assertEqual(1, len(client.timeseries.call_args_list))
        # check that there is no dimensions entry
        called_args = client.timeseries.call_args_list[0][1]
        self.assertNotIn('dimensions', called_args)
        self.assertIn('post_aggregations', called_args)
    def test_run_query_with_adhoc_metric(self):
        client = Mock()
        from_dttm = Mock()
        to_dttm = Mock()
        from_dttm.replace = Mock(return_value=from_dttm)
        to_dttm.replace = Mock(return_value=to_dttm)
        from_dttm.isoformat = Mock(return_value="from")
        to_dttm.isoformat = Mock(return_value="to")
        timezone = "timezone"
        from_dttm.tzname = Mock(return_value=timezone)
        ds = DruidDatasource(datasource_name="datasource")
        metric1 = DruidMetric(metric_name="metric1")
        metric2 = DruidMetric(metric_name="metric2")
        ds.metrics = [metric1, metric2]
        col1 = DruidColumn(column_name="col1")
        col2 = DruidColumn(column_name="col2")
        ds.columns = [col1, col2]
        all_metrics = []
        post_aggs = ["some_agg"]
        ds._metrics_and_post_aggs = Mock(return_value=(all_metrics, post_aggs))
        groupby = []
        metrics = [{
            "expressionType": "SIMPLE",
            "column": {
                "type": "DOUBLE",
                "column_name": "col1"
            },
            "aggregate": "SUM",
            "label": "My Adhoc Metric",
        }]

        ds.get_having_filters = Mock(return_value=[])
        client.query_builder = Mock()
        client.query_builder.last_query = Mock()
        client.query_builder.last_query.query_dict = {"mock": 0}
        # no groupby calls client.timeseries
        ds.run_query(
            groupby,
            metrics,
            None,
            from_dttm,
            to_dttm,
            client=client,
            filter=[],
            row_limit=100,
        )
        self.assertEqual(0, len(client.topn.call_args_list))
        self.assertEqual(0, len(client.groupby.call_args_list))
        self.assertEqual(1, len(client.timeseries.call_args_list))
        # check that there is no dimensions entry
        called_args = client.timeseries.call_args_list[0][1]
        self.assertNotIn("dimensions", called_args)
        self.assertIn("post_aggregations", called_args)
 def test_run_query_multiple_groupby(self):
     client = Mock()
     from_dttm = Mock()
     to_dttm = Mock()
     from_dttm.replace = Mock(return_value=from_dttm)
     to_dttm.replace = Mock(return_value=to_dttm)
     from_dttm.isoformat = Mock(return_value="from")
     to_dttm.isoformat = Mock(return_value="to")
     timezone = "timezone"
     from_dttm.tzname = Mock(return_value=timezone)
     ds = DruidDatasource(datasource_name="datasource")
     metric1 = DruidMetric(metric_name="metric1")
     metric2 = DruidMetric(metric_name="metric2")
     ds.metrics = [metric1, metric2]
     col1 = DruidColumn(column_name="col1")
     col2 = DruidColumn(column_name="col2")
     ds.columns = [col1, col2]
     aggs = []
     post_aggs = ["some_agg"]
     ds._metrics_and_post_aggs = Mock(return_value=(aggs, post_aggs))
     groupby = ["col1", "col2"]
     metrics = ["metric1"]
     ds.get_having_filters = Mock(return_value=[])
     client.query_builder = Mock()
     client.query_builder.last_query = Mock()
     client.query_builder.last_query.query_dict = {"mock": 0}
     # no groupby calls client.timeseries
     ds.run_query(
         groupby,
         metrics,
         None,
         from_dttm,
         to_dttm,
         client=client,
         row_limit=100,
         filter=[],
     )
     self.assertEqual(0, len(client.topn.call_args_list))
     self.assertEqual(1, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     # check that there is no dimensions entry
     called_args = client.groupby.call_args_list[0][1]
     self.assertIn("dimensions", called_args)
     self.assertEqual(["col1", "col2"], called_args["dimensions"])
 def test_run_query_multiple_groupby(self):
     client = Mock()
     from_dttm = Mock()
     to_dttm = Mock()
     from_dttm.replace = Mock(return_value=from_dttm)
     to_dttm.replace = Mock(return_value=to_dttm)
     from_dttm.isoformat = Mock(return_value='from')
     to_dttm.isoformat = Mock(return_value='to')
     timezone = 'timezone'
     from_dttm.tzname = Mock(return_value=timezone)
     ds = DruidDatasource(datasource_name='datasource')
     metric1 = DruidMetric(metric_name='metric1')
     metric2 = DruidMetric(metric_name='metric2')
     ds.metrics = [metric1, metric2]
     col1 = DruidColumn(column_name='col1')
     col2 = DruidColumn(column_name='col2')
     ds.columns = [col1, col2]
     aggs = []
     post_aggs = ['some_agg']
     ds._metrics_and_post_aggs = Mock(return_value=(aggs, post_aggs))
     groupby = ['col1', 'col2']
     metrics = ['metric1']
     ds.get_having_filters = Mock(return_value=[])
     client.query_builder = Mock()
     client.query_builder.last_query = Mock()
     client.query_builder.last_query.query_dict = {'mock': 0}
     # no groupby calls client.timeseries
     ds.run_query(
         groupby,
         metrics,
         None,
         from_dttm,
         to_dttm,
         client=client,
         row_limit=100,
         filter=[],
     )
     self.assertEqual(0, len(client.topn.call_args_list))
     self.assertEqual(1, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     # check that there is no dimensions entry
     called_args = client.groupby.call_args_list[0][1]
     self.assertIn('dimensions', called_args)
     self.assertEqual(['col1', 'col2'], called_args['dimensions'])
示例#5
0
    def test_run_query_with_adhoc_metric(self):
        client = Mock()
        from_dttm = Mock()
        to_dttm = Mock()
        from_dttm.replace = Mock(return_value=from_dttm)
        to_dttm.replace = Mock(return_value=to_dttm)
        from_dttm.isoformat = Mock(return_value='from')
        to_dttm.isoformat = Mock(return_value='to')
        timezone = 'timezone'
        from_dttm.tzname = Mock(return_value=timezone)
        ds = DruidDatasource(datasource_name='datasource')
        metric1 = DruidMetric(metric_name='metric1')
        metric2 = DruidMetric(metric_name='metric2')
        ds.metrics = [metric1, metric2]
        col1 = DruidColumn(column_name='col1')
        col2 = DruidColumn(column_name='col2')
        ds.columns = [col1, col2]
        all_metrics = []
        post_aggs = ['some_agg']
        ds._metrics_and_post_aggs = Mock(return_value=(all_metrics, post_aggs))
        groupby = []
        metrics = [{
            'expressionType': 'SIMPLE',
            'column': {'type': 'DOUBLE', 'column_name': 'col1'},
            'aggregate': 'SUM',
            'label': 'My Adhoc Metric',
        }]

        ds.get_having_filters = Mock(return_value=[])
        client.query_builder = Mock()
        client.query_builder.last_query = Mock()
        client.query_builder.last_query.query_dict = {'mock': 0}
        # no groupby calls client.timeseries
        ds.run_query(
            groupby, metrics, None, from_dttm,
            to_dttm, client=client, filter=[], row_limit=100,
        )
        self.assertEqual(0, len(client.topn.call_args_list))
        self.assertEqual(0, len(client.groupby.call_args_list))
        self.assertEqual(1, len(client.timeseries.call_args_list))
        # check that there is no dimensions entry
        called_args = client.timeseries.call_args_list[0][1]
        self.assertNotIn('dimensions', called_args)
        self.assertIn('post_aggregations', called_args)
 def test_run_query_multiple_groupby(self):
     client = Mock()
     from_dttm = Mock()
     to_dttm = Mock()
     from_dttm.replace = Mock(return_value=from_dttm)
     to_dttm.replace = Mock(return_value=to_dttm)
     from_dttm.isoformat = Mock(return_value='from')
     to_dttm.isoformat = Mock(return_value='to')
     timezone = 'timezone'
     from_dttm.tzname = Mock(return_value=timezone)
     ds = DruidDatasource(datasource_name='datasource')
     metric1 = DruidMetric(metric_name='metric1')
     metric2 = DruidMetric(metric_name='metric2')
     ds.metrics = [metric1, metric2]
     col1 = DruidColumn(column_name='col1')
     col2 = DruidColumn(column_name='col2')
     ds.columns = [col1, col2]
     all_metrics = []
     post_aggs = ['some_agg']
     ds._metrics_and_post_aggs = Mock(return_value=(all_metrics, post_aggs))
     groupby = ['col1', 'col2']
     metrics = ['metric1']
     ds.get_having_filters = Mock(return_value=[])
     client.query_builder = Mock()
     client.query_builder.last_query = Mock()
     client.query_builder.last_query.query_dict = {'mock': 0}
     # no groupby calls client.timeseries
     ds.run_query(
         groupby, metrics, None, from_dttm,
         to_dttm, client=client, row_limit=100,
         filter=[],
     )
     self.assertEqual(0, len(client.topn.call_args_list))
     self.assertEqual(1, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     # check that there is no dimensions entry
     called_args = client.groupby.call_args_list[0][1]
     self.assertIn('dimensions', called_args)
     self.assertEqual(['col1', 'col2'], called_args['dimensions'])
 def test_run_query_single_groupby(self):
     client = Mock()
     from_dttm = Mock()
     to_dttm = Mock()
     from_dttm.replace = Mock(return_value=from_dttm)
     to_dttm.replace = Mock(return_value=to_dttm)
     from_dttm.isoformat = Mock(return_value="from")
     to_dttm.isoformat = Mock(return_value="to")
     timezone = "timezone"
     from_dttm.tzname = Mock(return_value=timezone)
     ds = DruidDatasource(datasource_name="datasource")
     metric1 = DruidMetric(metric_name="metric1")
     metric2 = DruidMetric(metric_name="metric2")
     ds.metrics = [metric1, metric2]
     col1 = DruidColumn(column_name="col1")
     col2 = DruidColumn(column_name="col2")
     ds.columns = [col1, col2]
     aggs = ["metric1"]
     post_aggs = ["some_agg"]
     ds._metrics_and_post_aggs = Mock(return_value=(aggs, post_aggs))
     groupby = ["col1"]
     metrics = ["metric1"]
     ds.get_having_filters = Mock(return_value=[])
     client.query_builder.last_query.query_dict = {"mock": 0}
     # client.topn is called twice
     ds.run_query(
         groupby,
         metrics,
         None,
         from_dttm,
         to_dttm,
         timeseries_limit=100,
         client=client,
         order_desc=True,
         filter=[],
     )
     self.assertEqual(2, len(client.topn.call_args_list))
     self.assertEqual(0, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     # check that there is no dimensions entry
     called_args_pre = client.topn.call_args_list[0][1]
     self.assertNotIn("dimensions", called_args_pre)
     self.assertIn("dimension", called_args_pre)
     called_args = client.topn.call_args_list[1][1]
     self.assertIn("dimension", called_args)
     self.assertEqual("col1", called_args["dimension"])
     # not order_desc
     client = Mock()
     client.query_builder.last_query.query_dict = {"mock": 0}
     ds.run_query(
         groupby,
         metrics,
         None,
         from_dttm,
         to_dttm,
         client=client,
         order_desc=False,
         filter=[],
         row_limit=100,
     )
     self.assertEqual(0, len(client.topn.call_args_list))
     self.assertEqual(1, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     self.assertIn("dimensions", client.groupby.call_args_list[0][1])
     self.assertEqual(["col1"],
                      client.groupby.call_args_list[0][1]["dimensions"])
     # order_desc but timeseries and dimension spec
     # calls topn with single dimension spec 'dimension'
     spec = {"outputName": "hello", "dimension": "matcho"}
     spec_json = json.dumps(spec)
     col3 = DruidColumn(column_name="col3", dimension_spec_json=spec_json)
     ds.columns.append(col3)
     groupby = ["col3"]
     client = Mock()
     client.query_builder.last_query.query_dict = {"mock": 0}
     ds.run_query(
         groupby,
         metrics,
         None,
         from_dttm,
         to_dttm,
         client=client,
         order_desc=True,
         timeseries_limit=5,
         filter=[],
         row_limit=100,
     )
     self.assertEqual(2, len(client.topn.call_args_list))
     self.assertEqual(0, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     self.assertIn("dimension", client.topn.call_args_list[0][1])
     self.assertIn("dimension", client.topn.call_args_list[1][1])
     # uses dimension for pre query and full spec for final query
     self.assertEqual("matcho",
                      client.topn.call_args_list[0][1]["dimension"])
     self.assertEqual(spec, client.topn.call_args_list[1][1]["dimension"])
 def test_run_query_single_groupby(self):
     client = Mock()
     from_dttm = Mock()
     to_dttm = Mock()
     from_dttm.replace = Mock(return_value=from_dttm)
     to_dttm.replace = Mock(return_value=to_dttm)
     from_dttm.isoformat = Mock(return_value='from')
     to_dttm.isoformat = Mock(return_value='to')
     timezone = 'timezone'
     from_dttm.tzname = Mock(return_value=timezone)
     ds = DruidDatasource(datasource_name='datasource')
     metric1 = DruidMetric(metric_name='metric1')
     metric2 = DruidMetric(metric_name='metric2')
     ds.metrics = [metric1, metric2]
     col1 = DruidColumn(column_name='col1')
     col2 = DruidColumn(column_name='col2')
     ds.columns = [col1, col2]
     aggs = ['metric1']
     post_aggs = ['some_agg']
     ds._metrics_and_post_aggs = Mock(return_value=(aggs, post_aggs))
     groupby = ['col1']
     metrics = ['metric1']
     ds.get_having_filters = Mock(return_value=[])
     client.query_builder.last_query.query_dict = {'mock': 0}
     # client.topn is called twice
     ds.run_query(
         groupby,
         metrics,
         None,
         from_dttm,
         to_dttm,
         timeseries_limit=100,
         client=client,
         order_desc=True,
         filter=[],
     )
     self.assertEqual(2, len(client.topn.call_args_list))
     self.assertEqual(0, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     # check that there is no dimensions entry
     called_args_pre = client.topn.call_args_list[0][1]
     self.assertNotIn('dimensions', called_args_pre)
     self.assertIn('dimension', called_args_pre)
     called_args = client.topn.call_args_list[1][1]
     self.assertIn('dimension', called_args)
     self.assertEqual('col1', called_args['dimension'])
     # not order_desc
     client = Mock()
     client.query_builder.last_query.query_dict = {'mock': 0}
     ds.run_query(
         groupby,
         metrics,
         None,
         from_dttm,
         to_dttm,
         client=client,
         order_desc=False,
         filter=[],
         row_limit=100,
     )
     self.assertEqual(0, len(client.topn.call_args_list))
     self.assertEqual(1, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     self.assertIn('dimensions', client.groupby.call_args_list[0][1])
     self.assertEqual(['col1'],
                      client.groupby.call_args_list[0][1]['dimensions'])
     # order_desc but timeseries and dimension spec
     # calls topn with single dimension spec 'dimension'
     spec = {'outputName': 'hello', 'dimension': 'matcho'}
     spec_json = json.dumps(spec)
     col3 = DruidColumn(column_name='col3', dimension_spec_json=spec_json)
     ds.columns.append(col3)
     groupby = ['col3']
     client = Mock()
     client.query_builder.last_query.query_dict = {'mock': 0}
     ds.run_query(
         groupby,
         metrics,
         None,
         from_dttm,
         to_dttm,
         client=client,
         order_desc=True,
         timeseries_limit=5,
         filter=[],
         row_limit=100,
     )
     self.assertEqual(2, len(client.topn.call_args_list))
     self.assertEqual(0, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     self.assertIn('dimension', client.topn.call_args_list[0][1])
     self.assertIn('dimension', client.topn.call_args_list[1][1])
     # uses dimension for pre query and full spec for final query
     self.assertEqual('matcho',
                      client.topn.call_args_list[0][1]['dimension'])
     self.assertEqual(spec, client.topn.call_args_list[1][1]['dimension'])
    def test_metrics_and_post_aggs(self):
        """
        Test generation of metrics and post-aggregations from an initial list
        of superset metrics (which may include the results of either). This
        primarily tests that specifying a post-aggregator metric will also
        require the raw aggregation of the associated druid metric column.
        """
        metrics_dict = {
            'unused_count':
            DruidMetric(metric_name='unused_count',
                        verbose_name='COUNT(*)',
                        metric_type='count',
                        json=json.dumps({
                            'type': 'count',
                            'name': 'unused_count'
                        })),
            'some_sum':
            DruidMetric(metric_name='some_sum',
                        verbose_name='SUM(*)',
                        metric_type='sum',
                        json=json.dumps({
                            'type': 'sum',
                            'name': 'sum'
                        })),
            'a_histogram':
            DruidMetric(metric_name='a_histogram',
                        verbose_name='APPROXIMATE_HISTOGRAM(*)',
                        metric_type='approxHistogramFold',
                        json=json.dumps({
                            'type': 'approxHistogramFold',
                            'name': 'a_histogram'
                        })),
            'aCustomMetric':
            DruidMetric(metric_name='aCustomMetric',
                        verbose_name='MY_AWESOME_METRIC(*)',
                        metric_type='aCustomType',
                        json=json.dumps({
                            'type': 'customMetric',
                            'name': 'aCustomMetric'
                        })),
            'quantile_p95':
            DruidMetric(metric_name='quantile_p95',
                        verbose_name='P95(*)',
                        metric_type='postagg',
                        json=json.dumps({
                            'type': 'quantile',
                            'probability': 0.95,
                            'name': 'p95',
                            'fieldName': 'a_histogram'
                        })),
            'aCustomPostAgg':
            DruidMetric(metric_name='aCustomPostAgg',
                        verbose_name='CUSTOM_POST_AGG(*)',
                        metric_type='postagg',
                        json=json.dumps({
                            'type': 'customPostAgg',
                            'name': 'aCustomPostAgg',
                            'field': {
                                'type': 'fieldAccess',
                                'fieldName': 'aCustomMetric'
                            }
                        })),
        }

        metrics = ['some_sum']
        all_metrics, post_aggs = DruidDatasource._metrics_and_post_aggs(
            metrics, metrics_dict)

        assert all_metrics == ['some_sum']
        assert post_aggs == {}

        metrics = ['quantile_p95']
        all_metrics, post_aggs = DruidDatasource._metrics_and_post_aggs(
            metrics, metrics_dict)

        result_postaggs = set(['quantile_p95'])
        assert all_metrics == ['a_histogram']
        assert set(post_aggs.keys()) == result_postaggs

        metrics = ['aCustomPostAgg']
        all_metrics, post_aggs = DruidDatasource._metrics_and_post_aggs(
            metrics, metrics_dict)

        result_postaggs = set(['aCustomPostAgg'])
        assert all_metrics == ['aCustomMetric']
        assert set(post_aggs.keys()) == result_postaggs
 def test_run_query_single_groupby(self):
     client = Mock()
     from_dttm = Mock()
     to_dttm = Mock()
     from_dttm.replace = Mock(return_value=from_dttm)
     to_dttm.replace = Mock(return_value=to_dttm)
     from_dttm.isoformat = Mock(return_value='from')
     to_dttm.isoformat = Mock(return_value='to')
     timezone = 'timezone'
     from_dttm.tzname = Mock(return_value=timezone)
     ds = DruidDatasource(datasource_name='datasource')
     metric1 = DruidMetric(metric_name='metric1')
     metric2 = DruidMetric(metric_name='metric2')
     ds.metrics = [metric1, metric2]
     col1 = DruidColumn(column_name='col1')
     col2 = DruidColumn(column_name='col2')
     ds.columns = [col1, col2]
     all_metrics = ['metric1']
     post_aggs = ['some_agg']
     ds._metrics_and_post_aggs = Mock(return_value=(all_metrics, post_aggs))
     groupby = ['col1']
     metrics = ['metric1']
     ds.get_having_filters = Mock(return_value=[])
     client.query_builder.last_query.query_dict = {'mock': 0}
     # client.topn is called twice
     ds.run_query(
         groupby, metrics, None, from_dttm, to_dttm, timeseries_limit=100,
         client=client, order_desc=True, filter=[],
     )
     self.assertEqual(2, len(client.topn.call_args_list))
     self.assertEqual(0, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     # check that there is no dimensions entry
     called_args_pre = client.topn.call_args_list[0][1]
     self.assertNotIn('dimensions', called_args_pre)
     self.assertIn('dimension', called_args_pre)
     called_args = client.topn.call_args_list[1][1]
     self.assertIn('dimension', called_args)
     self.assertEqual('col1', called_args['dimension'])
     # not order_desc
     client = Mock()
     client.query_builder.last_query.query_dict = {'mock': 0}
     ds.run_query(
         groupby, metrics, None, from_dttm, to_dttm, client=client,
         order_desc=False, filter=[], row_limit=100,
     )
     self.assertEqual(0, len(client.topn.call_args_list))
     self.assertEqual(1, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     self.assertIn('dimensions', client.groupby.call_args_list[0][1])
     self.assertEqual(['col1'], client.groupby.call_args_list[0][1]['dimensions'])
     # order_desc but timeseries and dimension spec
     # calls topn with single dimension spec 'dimension'
     spec = {'outputName': 'hello', 'dimension': 'matcho'}
     spec_json = json.dumps(spec)
     col3 = DruidColumn(column_name='col3', dimension_spec_json=spec_json)
     ds.columns.append(col3)
     groupby = ['col3']
     client = Mock()
     client.query_builder.last_query.query_dict = {'mock': 0}
     ds.run_query(
         groupby, metrics, None, from_dttm, to_dttm,
         client=client, order_desc=True, timeseries_limit=5,
         filter=[], row_limit=100,
     )
     self.assertEqual(2, len(client.topn.call_args_list))
     self.assertEqual(0, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     self.assertIn('dimension', client.topn.call_args_list[0][1])
     self.assertIn('dimension', client.topn.call_args_list[1][1])
     # uses dimension for pre query and full spec for final query
     self.assertEqual('matcho', client.topn.call_args_list[0][1]['dimension'])
     self.assertEqual(spec, client.topn.call_args_list[1][1]['dimension'])
示例#11
0
    def test_metrics_and_post_aggs(self):
        """
        Test generation of metrics and post-aggregations from an initial list
        of superset metrics (which may include the results of either). This
        primarily tests that specifying a post-aggregator metric will also
        require the raw aggregation of the associated druid metric column.
        """
        metrics_dict = {
            'unused_count': DruidMetric(
                metric_name='unused_count',
                verbose_name='COUNT(*)',
                metric_type='count',
                json=json.dumps({'type': 'count', 'name': 'unused_count'}),
            ),
            'some_sum': DruidMetric(
                metric_name='some_sum',
                verbose_name='SUM(*)',
                metric_type='sum',
                json=json.dumps({'type': 'sum', 'name': 'sum'}),
            ),
            'a_histogram': DruidMetric(
                metric_name='a_histogram',
                verbose_name='APPROXIMATE_HISTOGRAM(*)',
                metric_type='approxHistogramFold',
                json=json.dumps(
                    {'type': 'approxHistogramFold', 'name': 'a_histogram'},
                ),
            ),
            'aCustomMetric': DruidMetric(
                metric_name='aCustomMetric',
                verbose_name='MY_AWESOME_METRIC(*)',
                metric_type='aCustomType',
                json=json.dumps(
                    {'type': 'customMetric', 'name': 'aCustomMetric'},
                ),
            ),
            'quantile_p95': DruidMetric(
                metric_name='quantile_p95',
                verbose_name='P95(*)',
                metric_type='postagg',
                json=json.dumps({
                    'type': 'quantile',
                    'probability': 0.95,
                    'name': 'p95',
                    'fieldName': 'a_histogram',
                }),
            ),
            'aCustomPostAgg': DruidMetric(
                metric_name='aCustomPostAgg',
                verbose_name='CUSTOM_POST_AGG(*)',
                metric_type='postagg',
                json=json.dumps({
                    'type': 'customPostAgg',
                    'name': 'aCustomPostAgg',
                    'field': {
                        'type': 'fieldAccess',
                        'fieldName': 'aCustomMetric',
                    },
                }),
            ),
        }

        metrics = ['some_sum']
        all_metrics, post_aggs = DruidDatasource._metrics_and_post_aggs(
            metrics, metrics_dict)

        assert all_metrics == ['some_sum']
        assert post_aggs == {}

        metrics = ['quantile_p95']
        all_metrics, post_aggs = DruidDatasource._metrics_and_post_aggs(
            metrics, metrics_dict)

        result_postaggs = set(['quantile_p95'])
        assert all_metrics == ['a_histogram']
        assert set(post_aggs.keys()) == result_postaggs

        metrics = ['aCustomPostAgg']
        all_metrics, post_aggs = DruidDatasource._metrics_and_post_aggs(
            metrics, metrics_dict)

        result_postaggs = set(['aCustomPostAgg'])
        assert all_metrics == ['aCustomMetric']
        assert set(post_aggs.keys()) == result_postaggs