def test_get_aggregations(self):
        ds = DruidDatasource(datasource_name="datasource")
        metrics_dict = {
            "sum1":
            DruidMetric(
                metric_name="sum1",
                metric_type="doubleSum",
                json=json.dumps({
                    "type": "doubleSum",
                    "name": "sum1"
                }),
            ),
            "sum2":
            DruidMetric(
                metric_name="sum2",
                metric_type="doubleSum",
                json=json.dumps({
                    "type": "doubleSum",
                    "name": "sum2"
                }),
            ),
            "div1":
            DruidMetric(
                metric_name="div1",
                metric_type="postagg",
                json=json.dumps({
                    "fn":
                    "/",
                    "type":
                    "arithmetic",
                    "name":
                    "div1",
                    "fields": [
                        {
                            "fieldName": "sum1",
                            "type": "fieldAccess"
                        },
                        {
                            "fieldName": "sum2",
                            "type": "fieldAccess"
                        },
                    ],
                }),
            ),
        }
        metric_names = ["sum1", "sum2"]
        aggs = ds.get_aggregations(metrics_dict, metric_names)
        expected_agg = {
            name: metrics_dict[name].json_obj
            for name in metric_names
        }
        self.assertEqual(expected_agg, aggs)

        metric_names = ["sum1", "col1"]
        self.assertRaises(SupersetException, ds.get_aggregations, metrics_dict,
                          metric_names)

        metric_names = ["sum1", "div1"]
        self.assertRaises(SupersetException, ds.get_aggregations, metrics_dict,
                          metric_names)
Пример #2
0
    def test_get_aggregations(self):
        ds = DruidDatasource(datasource_name='datasource')
        metrics_dict = {
            'sum1':
            DruidMetric(
                metric_name='sum1',
                metric_type='doubleSum',
                json=json.dumps({
                    'type': 'doubleSum',
                    'name': 'sum1'
                }),
            ),
            'sum2':
            DruidMetric(
                metric_name='sum2',
                metric_type='doubleSum',
                json=json.dumps({
                    'type': 'doubleSum',
                    'name': 'sum2'
                }),
            ),
            'div1':
            DruidMetric(
                metric_name='div1',
                metric_type='postagg',
                json=json.dumps({
                    'fn':
                    '/',
                    'type':
                    'arithmetic',
                    'name':
                    'div1',
                    'fields': [
                        {
                            'fieldName': 'sum1',
                            'type': 'fieldAccess',
                        },
                        {
                            'fieldName': 'sum2',
                            'type': 'fieldAccess',
                        },
                    ],
                }),
            ),
        }
        metric_names = ['sum1', 'sum2']
        aggs = ds.get_aggregations(metrics_dict, metric_names)
        expected_agg = {
            name: metrics_dict[name].json_obj
            for name in metric_names
        }
        self.assertEqual(expected_agg, aggs)

        metric_names = ['sum1', 'col1']
        self.assertRaises(SupersetException, ds.get_aggregations, metrics_dict,
                          metric_names)

        metric_names = ['sum1', 'div1']
        self.assertRaises(SupersetException, ds.get_aggregations, metrics_dict,
                          metric_names)
    def test_run_query_with_adhoc_metric(self):
        client = Mock()
        from_dttm = Mock()
        to_dttm = Mock()
        from_dttm.replace = Mock(return_value=from_dttm)
        to_dttm.replace = Mock(return_value=to_dttm)
        from_dttm.isoformat = Mock(return_value="from")
        to_dttm.isoformat = Mock(return_value="to")
        timezone = "timezone"
        from_dttm.tzname = Mock(return_value=timezone)
        ds = DruidDatasource(datasource_name="datasource")
        metric1 = DruidMetric(metric_name="metric1")
        metric2 = DruidMetric(metric_name="metric2")
        ds.metrics = [metric1, metric2]
        col1 = DruidColumn(column_name="col1")
        col2 = DruidColumn(column_name="col2")
        ds.columns = [col1, col2]
        all_metrics = []
        post_aggs = ["some_agg"]
        ds._metrics_and_post_aggs = Mock(return_value=(all_metrics, post_aggs))
        groupby = []
        metrics = [{
            "expressionType": "SIMPLE",
            "column": {
                "type": "DOUBLE",
                "column_name": "col1"
            },
            "aggregate": "SUM",
            "label": "My Adhoc Metric",
        }]

        ds.get_having_filters = Mock(return_value=[])
        client.query_builder = Mock()
        client.query_builder.last_query = Mock()
        client.query_builder.last_query.query_dict = {"mock": 0}
        # no groupby calls client.timeseries
        ds.run_query(
            groupby,
            metrics,
            None,
            from_dttm,
            to_dttm,
            client=client,
            filter=[],
            row_limit=100,
        )
        self.assertEqual(0, len(client.topn.call_args_list))
        self.assertEqual(0, len(client.groupby.call_args_list))
        self.assertEqual(1, len(client.timeseries.call_args_list))
        # check that there is no dimensions entry
        called_args = client.timeseries.call_args_list[0][1]
        self.assertNotIn("dimensions", called_args)
        self.assertIn("post_aggregations", called_args)
Пример #4
0
    def test_run_query_with_adhoc_metric(self):
        client = Mock()
        from_dttm = Mock()
        to_dttm = Mock()
        from_dttm.replace = Mock(return_value=from_dttm)
        to_dttm.replace = Mock(return_value=to_dttm)
        from_dttm.isoformat = Mock(return_value='from')
        to_dttm.isoformat = Mock(return_value='to')
        timezone = 'timezone'
        from_dttm.tzname = Mock(return_value=timezone)
        ds = DruidDatasource(datasource_name='datasource')
        metric1 = DruidMetric(metric_name='metric1')
        metric2 = DruidMetric(metric_name='metric2')
        ds.metrics = [metric1, metric2]
        col1 = DruidColumn(column_name='col1')
        col2 = DruidColumn(column_name='col2')
        ds.columns = [col1, col2]
        all_metrics = []
        post_aggs = ['some_agg']
        ds._metrics_and_post_aggs = Mock(return_value=(all_metrics, post_aggs))
        groupby = []
        metrics = [{
            'expressionType': 'SIMPLE',
            'column': {
                'type': 'DOUBLE',
                'column_name': 'col1'
            },
            'aggregate': 'SUM',
            'label': 'My Adhoc Metric',
        }]

        ds.get_having_filters = Mock(return_value=[])
        client.query_builder = Mock()
        client.query_builder.last_query = Mock()
        client.query_builder.last_query.query_dict = {'mock': 0}
        # no groupby calls client.timeseries
        ds.run_query(
            groupby,
            metrics,
            None,
            from_dttm,
            to_dttm,
            client=client,
            filter=[],
            row_limit=100,
        )
        self.assertEqual(0, len(client.topn.call_args_list))
        self.assertEqual(0, len(client.groupby.call_args_list))
        self.assertEqual(1, len(client.timeseries.call_args_list))
        # check that there is no dimensions entry
        called_args = client.timeseries.call_args_list[0][1]
        self.assertNotIn('dimensions', called_args)
        self.assertIn('post_aggregations', called_args)
Пример #5
0
    def create_druid_datasource(self,
                                name,
                                id=0,
                                cols_names=[],
                                metric_names=[]):
        name = '{0}{1}'.format(NAME_PREFIX, name)
        cluster_name = 'druid_test'
        params = {DBREF: id, 'database_name': cluster_name}
        dict_rep = {
            'cluster_name': cluster_name,
            'datasource_name': name,
            'id': id,
            'params': json.dumps(params),
            'columns': [{
                'column_name': c
            } for c in cols_names],
            'metrics': [{
                'metric_name': c,
                'json': '{}'
            } for c in metric_names],
        }

        datasource = DruidDatasource(
            id=id,
            datasource_name=name,
            cluster_name=cluster_name,
            params=json.dumps(params),
        )
        for col_name in cols_names:
            datasource.columns.append(DruidColumn(column_name=col_name))
        for metric_name in metric_names:
            datasource.metrics.append(DruidMetric(metric_name=metric_name))
        return datasource, dict_rep
Пример #6
0
def decode_dashboards(  # pylint: disable=too-many-return-statements
        o: Dict[str, Any]) -> Any:
    """
    Function to be passed into json.loads obj_hook parameter
    Recreates the dashboard object from a json representation.
    """
    from superset.connectors.druid.models import (
        DruidCluster,
        DruidColumn,
        DruidDatasource,
        DruidMetric,
    )

    if "__Dashboard__" in o:
        return Dashboard(**o["__Dashboard__"])
    if "__Slice__" in o:
        return Slice(**o["__Slice__"])
    if "__TableColumn__" in o:
        return TableColumn(**o["__TableColumn__"])
    if "__SqlaTable__" in o:
        return SqlaTable(**o["__SqlaTable__"])
    if "__SqlMetric__" in o:
        return SqlMetric(**o["__SqlMetric__"])
    if "__DruidCluster__" in o:
        return DruidCluster(**o["__DruidCluster__"])
    if "__DruidColumn__" in o:
        return DruidColumn(**o["__DruidColumn__"])
    if "__DruidDatasource__" in o:
        return DruidDatasource(**o["__DruidDatasource__"])
    if "__DruidMetric__" in o:
        return DruidMetric(**o["__DruidMetric__"])
    if "__datetime__" in o:
        return datetime.strptime(o["__datetime__"], "%Y-%m-%dT%H:%M:%S")

    return o
Пример #7
0
    def create_druid_datasource(self,
                                name,
                                id=0,
                                cols_names=[],
                                metric_names=[]):
        name = "{0}{1}".format(NAME_PREFIX, name)
        cluster_name = "druid_test"
        params = {DBREF: id, "database_name": cluster_name}
        dict_rep = {
            "cluster_name": cluster_name,
            "datasource_name": name,
            "id": id,
            "params": json.dumps(params),
            "columns": [{
                "column_name": c
            } for c in cols_names],
            "metrics": [{
                "metric_name": c,
                "json": "{}"
            } for c in metric_names],
        }

        datasource = DruidDatasource(
            id=id,
            datasource_name=name,
            cluster_name=cluster_name,
            params=json.dumps(params),
        )
        for col_name in cols_names:
            datasource.columns.append(DruidColumn(column_name=col_name))
        for metric_name in metric_names:
            datasource.metrics.append(DruidMetric(metric_name=metric_name))
        return datasource, dict_rep
Пример #8
0
 def test_run_query_multiple_groupby(self):
     client = Mock()
     from_dttm = Mock()
     to_dttm = Mock()
     from_dttm.replace = Mock(return_value=from_dttm)
     to_dttm.replace = Mock(return_value=to_dttm)
     from_dttm.isoformat = Mock(return_value='from')
     to_dttm.isoformat = Mock(return_value='to')
     timezone = 'timezone'
     from_dttm.tzname = Mock(return_value=timezone)
     ds = DruidDatasource(datasource_name='datasource')
     metric1 = DruidMetric(metric_name='metric1')
     metric2 = DruidMetric(metric_name='metric2')
     ds.metrics = [metric1, metric2]
     col1 = DruidColumn(column_name='col1')
     col2 = DruidColumn(column_name='col2')
     ds.columns = [col1, col2]
     aggs = []
     post_aggs = ['some_agg']
     ds._metrics_and_post_aggs = Mock(return_value=(aggs, post_aggs))
     groupby = ['col1', 'col2']
     metrics = ['metric1']
     ds.get_having_filters = Mock(return_value=[])
     client.query_builder = Mock()
     client.query_builder.last_query = Mock()
     client.query_builder.last_query.query_dict = {'mock': 0}
     # no groupby calls client.timeseries
     ds.run_query(
         groupby,
         metrics,
         None,
         from_dttm,
         to_dttm,
         client=client,
         row_limit=100,
         filter=[],
     )
     self.assertEqual(0, len(client.topn.call_args_list))
     self.assertEqual(1, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     # check that there is no dimensions entry
     called_args = client.groupby.call_args_list[0][1]
     self.assertIn('dimensions', called_args)
     self.assertEqual(['col1', 'col2'], called_args['dimensions'])
 def test_run_query_multiple_groupby(self):
     client = Mock()
     from_dttm = Mock()
     to_dttm = Mock()
     from_dttm.replace = Mock(return_value=from_dttm)
     to_dttm.replace = Mock(return_value=to_dttm)
     from_dttm.isoformat = Mock(return_value="from")
     to_dttm.isoformat = Mock(return_value="to")
     timezone = "timezone"
     from_dttm.tzname = Mock(return_value=timezone)
     ds = DruidDatasource(datasource_name="datasource")
     metric1 = DruidMetric(metric_name="metric1")
     metric2 = DruidMetric(metric_name="metric2")
     ds.metrics = [metric1, metric2]
     col1 = DruidColumn(column_name="col1")
     col2 = DruidColumn(column_name="col2")
     ds.columns = [col1, col2]
     aggs = []
     post_aggs = ["some_agg"]
     ds._metrics_and_post_aggs = Mock(return_value=(aggs, post_aggs))
     groupby = ["col1", "col2"]
     metrics = ["metric1"]
     ds.get_having_filters = Mock(return_value=[])
     client.query_builder = Mock()
     client.query_builder.last_query = Mock()
     client.query_builder.last_query.query_dict = {"mock": 0}
     # no groupby calls client.timeseries
     ds.run_query(
         groupby,
         metrics,
         None,
         from_dttm,
         to_dttm,
         client=client,
         row_limit=100,
         filter=[],
     )
     self.assertEqual(0, len(client.topn.call_args_list))
     self.assertEqual(1, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     # check that there is no dimensions entry
     called_args = client.groupby.call_args_list[0][1]
     self.assertIn("dimensions", called_args)
     self.assertEqual(["col1", "col2"], called_args["dimensions"])
Пример #10
0
 def create_druid_datasource(self, name, id=0, cols_names=[], metric_names=[]):
     params = {"remote_id": id, "database_name": "druid_test"}
     datasource = DruidDatasource(
         id=id,
         datasource_name=name,
         cluster_name="druid_test",
         params=json.dumps(params),
     )
     for col_name in cols_names:
         datasource.columns.append(DruidColumn(column_name=col_name))
     for metric_name in metric_names:
         datasource.metrics.append(DruidMetric(metric_name=metric_name, json="{}"))
     return datasource
Пример #11
0
 def create_druid_datasource(
         self, name, id=0, cols_names=[], metric_names=[]):
     params = {'remote_id': id, 'database_name': 'druid_test'}
     datasource = DruidDatasource(
         id=id,
         datasource_name=name,
         cluster_name='druid_test',
         params=json.dumps(params)
     )
     for col_name in cols_names:
         datasource.columns.append(
             DruidColumn(column_name=col_name))
     for metric_name in metric_names:
         datasource.metrics.append(DruidMetric(
             metric_name=metric_name))
     return datasource
    def create_druid_datasource(self, name, id=0, cols_names=[], metric_names=[]):
        cluster_name = "druid_test"
        cluster = self.get_or_create(
            DruidCluster, {"cluster_name": cluster_name}, db.session
        )

        params = {"remote_id": id, "database_name": cluster_name}
        datasource = DruidDatasource(
            id=id,
            datasource_name=name,
            cluster_id=cluster.id,
            params=json.dumps(params),
        )
        for col_name in cols_names:
            datasource.columns.append(DruidColumn(column_name=col_name))
        for metric_name in metric_names:
            datasource.metrics.append(DruidMetric(metric_name=metric_name, json="{}"))
        return datasource
    def test_run_query_order_by_metrics(self):
        client = Mock()
        client.query_builder.last_query.query_dict = {"mock": 0}
        from_dttm = Mock()
        to_dttm = Mock()
        ds = DruidDatasource(datasource_name="datasource")
        ds.get_having_filters = Mock(return_value=[])
        dim1 = DruidColumn(column_name="dim1")
        dim2 = DruidColumn(column_name="dim2")
        metrics_dict = {
            "count1":
            DruidMetric(
                metric_name="count1",
                metric_type="count",
                json=json.dumps({
                    "type": "count",
                    "name": "count1"
                }),
            ),
            "sum1":
            DruidMetric(
                metric_name="sum1",
                metric_type="doubleSum",
                json=json.dumps({
                    "type": "doubleSum",
                    "name": "sum1"
                }),
            ),
            "sum2":
            DruidMetric(
                metric_name="sum2",
                metric_type="doubleSum",
                json=json.dumps({
                    "type": "doubleSum",
                    "name": "sum2"
                }),
            ),
            "div1":
            DruidMetric(
                metric_name="div1",
                metric_type="postagg",
                json=json.dumps({
                    "fn":
                    "/",
                    "type":
                    "arithmetic",
                    "name":
                    "div1",
                    "fields": [
                        {
                            "fieldName": "sum1",
                            "type": "fieldAccess"
                        },
                        {
                            "fieldName": "sum2",
                            "type": "fieldAccess"
                        },
                    ],
                }),
            ),
        }
        ds.columns = [dim1, dim2]
        ds.metrics = list(metrics_dict.values())

        groupby = ["dim1"]
        metrics = ["count1"]
        granularity = "all"
        # get the counts of the top 5 'dim1's, order by 'sum1'
        ds.run_query(
            groupby,
            metrics,
            granularity,
            from_dttm,
            to_dttm,
            timeseries_limit=5,
            timeseries_limit_metric="sum1",
            client=client,
            order_desc=True,
            filter=[],
        )
        qry_obj = client.topn.call_args_list[0][1]
        self.assertEqual("dim1", qry_obj["dimension"])
        self.assertEqual("sum1", qry_obj["metric"])
        aggregations = qry_obj["aggregations"]
        post_aggregations = qry_obj["post_aggregations"]
        self.assertEqual({"count1", "sum1"}, set(aggregations.keys()))
        self.assertEqual(set(), set(post_aggregations.keys()))

        # get the counts of the top 5 'dim1's, order by 'div1'
        ds.run_query(
            groupby,
            metrics,
            granularity,
            from_dttm,
            to_dttm,
            timeseries_limit=5,
            timeseries_limit_metric="div1",
            client=client,
            order_desc=True,
            filter=[],
        )
        qry_obj = client.topn.call_args_list[1][1]
        self.assertEqual("dim1", qry_obj["dimension"])
        self.assertEqual("div1", qry_obj["metric"])
        aggregations = qry_obj["aggregations"]
        post_aggregations = qry_obj["post_aggregations"]
        self.assertEqual({"count1", "sum1", "sum2"}, set(aggregations.keys()))
        self.assertEqual({"div1"}, set(post_aggregations.keys()))

        groupby = ["dim1", "dim2"]
        # get the counts of the top 5 ['dim1', 'dim2']s, order by 'sum1'
        ds.run_query(
            groupby,
            metrics,
            granularity,
            from_dttm,
            to_dttm,
            timeseries_limit=5,
            timeseries_limit_metric="sum1",
            client=client,
            order_desc=True,
            filter=[],
        )
        qry_obj = client.groupby.call_args_list[0][1]
        self.assertEqual({"dim1", "dim2"}, set(qry_obj["dimensions"]))
        self.assertEqual("sum1",
                         qry_obj["limit_spec"]["columns"][0]["dimension"])
        aggregations = qry_obj["aggregations"]
        post_aggregations = qry_obj["post_aggregations"]
        self.assertEqual({"count1", "sum1"}, set(aggregations.keys()))
        self.assertEqual(set(), set(post_aggregations.keys()))

        # get the counts of the top 5 ['dim1', 'dim2']s, order by 'div1'
        ds.run_query(
            groupby,
            metrics,
            granularity,
            from_dttm,
            to_dttm,
            timeseries_limit=5,
            timeseries_limit_metric="div1",
            client=client,
            order_desc=True,
            filter=[],
        )
        qry_obj = client.groupby.call_args_list[1][1]
        self.assertEqual({"dim1", "dim2"}, set(qry_obj["dimensions"]))
        self.assertEqual("div1",
                         qry_obj["limit_spec"]["columns"][0]["dimension"])
        aggregations = qry_obj["aggregations"]
        post_aggregations = qry_obj["post_aggregations"]
        self.assertEqual({"count1", "sum1", "sum2"}, set(aggregations.keys()))
        self.assertEqual({"div1"}, set(post_aggregations.keys()))
    def test_metrics_and_post_aggs(self):
        """
        Test generation of metrics and post-aggregations from an initial list
        of superset metrics (which may include the results of either). This
        primarily tests that specifying a post-aggregator metric will also
        require the raw aggregation of the associated druid metric column.
        """
        metrics_dict = {
            "unused_count":
            DruidMetric(
                metric_name="unused_count",
                verbose_name="COUNT(*)",
                metric_type="count",
                json=json.dumps({
                    "type": "count",
                    "name": "unused_count"
                }),
            ),
            "some_sum":
            DruidMetric(
                metric_name="some_sum",
                verbose_name="SUM(*)",
                metric_type="sum",
                json=json.dumps({
                    "type": "sum",
                    "name": "sum"
                }),
            ),
            "a_histogram":
            DruidMetric(
                metric_name="a_histogram",
                verbose_name="APPROXIMATE_HISTOGRAM(*)",
                metric_type="approxHistogramFold",
                json=json.dumps({
                    "type": "approxHistogramFold",
                    "name": "a_histogram"
                }),
            ),
            "aCustomMetric":
            DruidMetric(
                metric_name="aCustomMetric",
                verbose_name="MY_AWESOME_METRIC(*)",
                metric_type="aCustomType",
                json=json.dumps({
                    "type": "customMetric",
                    "name": "aCustomMetric"
                }),
            ),
            "quantile_p95":
            DruidMetric(
                metric_name="quantile_p95",
                verbose_name="P95(*)",
                metric_type="postagg",
                json=json.dumps({
                    "type": "quantile",
                    "probability": 0.95,
                    "name": "p95",
                    "fieldName": "a_histogram",
                }),
            ),
            "aCustomPostAgg":
            DruidMetric(
                metric_name="aCustomPostAgg",
                verbose_name="CUSTOM_POST_AGG(*)",
                metric_type="postagg",
                json=json.dumps({
                    "type": "customPostAgg",
                    "name": "aCustomPostAgg",
                    "field": {
                        "type": "fieldAccess",
                        "fieldName": "aCustomMetric"
                    },
                }),
            ),
        }

        adhoc_metric = {
            "expressionType": "SIMPLE",
            "column": {
                "type": "DOUBLE",
                "column_name": "value"
            },
            "aggregate": "SUM",
            "label": "My Adhoc Metric",
        }

        metrics = ["some_sum"]
        saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs(
            metrics, metrics_dict)

        assert set(saved_metrics.keys()) == {"some_sum"}
        assert post_aggs == {}

        metrics = [adhoc_metric]
        saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs(
            metrics, metrics_dict)

        assert set(saved_metrics.keys()) == set([adhoc_metric["label"]])
        assert post_aggs == {}

        metrics = ["some_sum", adhoc_metric]
        saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs(
            metrics, metrics_dict)

        assert set(saved_metrics.keys()) == {"some_sum", adhoc_metric["label"]}
        assert post_aggs == {}

        metrics = ["quantile_p95"]
        saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs(
            metrics, metrics_dict)

        result_postaggs = set(["quantile_p95"])
        assert set(saved_metrics.keys()) == {"a_histogram"}
        assert set(post_aggs.keys()) == result_postaggs

        metrics = ["aCustomPostAgg"]
        saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs(
            metrics, metrics_dict)

        result_postaggs = set(["aCustomPostAgg"])
        assert set(saved_metrics.keys()) == {"aCustomMetric"}
        assert set(post_aggs.keys()) == result_postaggs
 def test_run_query_single_groupby(self):
     client = Mock()
     from_dttm = Mock()
     to_dttm = Mock()
     from_dttm.replace = Mock(return_value=from_dttm)
     to_dttm.replace = Mock(return_value=to_dttm)
     from_dttm.isoformat = Mock(return_value="from")
     to_dttm.isoformat = Mock(return_value="to")
     timezone = "timezone"
     from_dttm.tzname = Mock(return_value=timezone)
     ds = DruidDatasource(datasource_name="datasource")
     metric1 = DruidMetric(metric_name="metric1")
     metric2 = DruidMetric(metric_name="metric2")
     ds.metrics = [metric1, metric2]
     col1 = DruidColumn(column_name="col1")
     col2 = DruidColumn(column_name="col2")
     ds.columns = [col1, col2]
     aggs = ["metric1"]
     post_aggs = ["some_agg"]
     ds._metrics_and_post_aggs = Mock(return_value=(aggs, post_aggs))
     groupby = ["col1"]
     metrics = ["metric1"]
     ds.get_having_filters = Mock(return_value=[])
     client.query_builder.last_query.query_dict = {"mock": 0}
     # client.topn is called twice
     ds.run_query(
         groupby,
         metrics,
         None,
         from_dttm,
         to_dttm,
         timeseries_limit=100,
         client=client,
         order_desc=True,
         filter=[],
     )
     self.assertEqual(2, len(client.topn.call_args_list))
     self.assertEqual(0, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     # check that there is no dimensions entry
     called_args_pre = client.topn.call_args_list[0][1]
     self.assertNotIn("dimensions", called_args_pre)
     self.assertIn("dimension", called_args_pre)
     called_args = client.topn.call_args_list[1][1]
     self.assertIn("dimension", called_args)
     self.assertEqual("col1", called_args["dimension"])
     # not order_desc
     client = Mock()
     client.query_builder.last_query.query_dict = {"mock": 0}
     ds.run_query(
         groupby,
         metrics,
         None,
         from_dttm,
         to_dttm,
         client=client,
         order_desc=False,
         filter=[],
         row_limit=100,
     )
     self.assertEqual(0, len(client.topn.call_args_list))
     self.assertEqual(1, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     self.assertIn("dimensions", client.groupby.call_args_list[0][1])
     self.assertEqual(["col1"],
                      client.groupby.call_args_list[0][1]["dimensions"])
     # order_desc but timeseries and dimension spec
     # calls topn with single dimension spec 'dimension'
     spec = {"outputName": "hello", "dimension": "matcho"}
     spec_json = json.dumps(spec)
     col3 = DruidColumn(column_name="col3", dimension_spec_json=spec_json)
     ds.columns.append(col3)
     groupby = ["col3"]
     client = Mock()
     client.query_builder.last_query.query_dict = {"mock": 0}
     ds.run_query(
         groupby,
         metrics,
         None,
         from_dttm,
         to_dttm,
         client=client,
         order_desc=True,
         timeseries_limit=5,
         filter=[],
         row_limit=100,
     )
     self.assertEqual(2, len(client.topn.call_args_list))
     self.assertEqual(0, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     self.assertIn("dimension", client.topn.call_args_list[0][1])
     self.assertIn("dimension", client.topn.call_args_list[1][1])
     # uses dimension for pre query and full spec for final query
     self.assertEqual("matcho",
                      client.topn.call_args_list[0][1]["dimension"])
     self.assertEqual(spec, client.topn.call_args_list[1][1]["dimension"])
Пример #16
0
    def test_run_query_order_by_metrics(self):
        client = Mock()
        client.query_builder.last_query.query_dict = {'mock': 0}
        from_dttm = Mock()
        to_dttm = Mock()
        ds = DruidDatasource(datasource_name='datasource')
        ds.get_having_filters = Mock(return_value=[])
        dim1 = DruidColumn(column_name='dim1')
        dim2 = DruidColumn(column_name='dim2')
        metrics_dict = {
            'count1':
            DruidMetric(
                metric_name='count1',
                metric_type='count',
                json=json.dumps({
                    'type': 'count',
                    'name': 'count1'
                }),
            ),
            'sum1':
            DruidMetric(
                metric_name='sum1',
                metric_type='doubleSum',
                json=json.dumps({
                    'type': 'doubleSum',
                    'name': 'sum1'
                }),
            ),
            'sum2':
            DruidMetric(
                metric_name='sum2',
                metric_type='doubleSum',
                json=json.dumps({
                    'type': 'doubleSum',
                    'name': 'sum2'
                }),
            ),
            'div1':
            DruidMetric(
                metric_name='div1',
                metric_type='postagg',
                json=json.dumps({
                    'fn':
                    '/',
                    'type':
                    'arithmetic',
                    'name':
                    'div1',
                    'fields': [
                        {
                            'fieldName': 'sum1',
                            'type': 'fieldAccess',
                        },
                        {
                            'fieldName': 'sum2',
                            'type': 'fieldAccess',
                        },
                    ],
                }),
            ),
        }
        ds.columns = [dim1, dim2]
        ds.metrics = list(metrics_dict.values())

        groupby = ['dim1']
        metrics = ['count1']
        granularity = 'all'
        # get the counts of the top 5 'dim1's, order by 'sum1'
        ds.run_query(
            groupby,
            metrics,
            granularity,
            from_dttm,
            to_dttm,
            timeseries_limit=5,
            timeseries_limit_metric='sum1',
            client=client,
            order_desc=True,
            filter=[],
        )
        qry_obj = client.topn.call_args_list[0][1]
        self.assertEqual('dim1', qry_obj['dimension'])
        self.assertEqual('sum1', qry_obj['metric'])
        aggregations = qry_obj['aggregations']
        post_aggregations = qry_obj['post_aggregations']
        self.assertEqual({'count1', 'sum1'}, set(aggregations.keys()))
        self.assertEqual(set(), set(post_aggregations.keys()))

        # get the counts of the top 5 'dim1's, order by 'div1'
        ds.run_query(
            groupby,
            metrics,
            granularity,
            from_dttm,
            to_dttm,
            timeseries_limit=5,
            timeseries_limit_metric='div1',
            client=client,
            order_desc=True,
            filter=[],
        )
        qry_obj = client.topn.call_args_list[1][1]
        self.assertEqual('dim1', qry_obj['dimension'])
        self.assertEqual('div1', qry_obj['metric'])
        aggregations = qry_obj['aggregations']
        post_aggregations = qry_obj['post_aggregations']
        self.assertEqual({'count1', 'sum1', 'sum2'}, set(aggregations.keys()))
        self.assertEqual({'div1'}, set(post_aggregations.keys()))

        groupby = ['dim1', 'dim2']
        # get the counts of the top 5 ['dim1', 'dim2']s, order by 'sum1'
        ds.run_query(
            groupby,
            metrics,
            granularity,
            from_dttm,
            to_dttm,
            timeseries_limit=5,
            timeseries_limit_metric='sum1',
            client=client,
            order_desc=True,
            filter=[],
        )
        qry_obj = client.groupby.call_args_list[0][1]
        self.assertEqual({'dim1', 'dim2'}, set(qry_obj['dimensions']))
        self.assertEqual('sum1',
                         qry_obj['limit_spec']['columns'][0]['dimension'])
        aggregations = qry_obj['aggregations']
        post_aggregations = qry_obj['post_aggregations']
        self.assertEqual({'count1', 'sum1'}, set(aggregations.keys()))
        self.assertEqual(set(), set(post_aggregations.keys()))

        # get the counts of the top 5 ['dim1', 'dim2']s, order by 'div1'
        ds.run_query(
            groupby,
            metrics,
            granularity,
            from_dttm,
            to_dttm,
            timeseries_limit=5,
            timeseries_limit_metric='div1',
            client=client,
            order_desc=True,
            filter=[],
        )
        qry_obj = client.groupby.call_args_list[1][1]
        self.assertEqual({'dim1', 'dim2'}, set(qry_obj['dimensions']))
        self.assertEqual('div1',
                         qry_obj['limit_spec']['columns'][0]['dimension'])
        aggregations = qry_obj['aggregations']
        post_aggregations = qry_obj['post_aggregations']
        self.assertEqual({'count1', 'sum1', 'sum2'}, set(aggregations.keys()))
        self.assertEqual({'div1'}, set(post_aggregations.keys()))
Пример #17
0
    def test_metrics_and_post_aggs(self):
        """
        Test generation of metrics and post-aggregations from an initial list
        of superset metrics (which may include the results of either). This
        primarily tests that specifying a post-aggregator metric will also
        require the raw aggregation of the associated druid metric column.
        """
        metrics_dict = {
            'unused_count':
            DruidMetric(
                metric_name='unused_count',
                verbose_name='COUNT(*)',
                metric_type='count',
                json=json.dumps({
                    'type': 'count',
                    'name': 'unused_count'
                }),
            ),
            'some_sum':
            DruidMetric(
                metric_name='some_sum',
                verbose_name='SUM(*)',
                metric_type='sum',
                json=json.dumps({
                    'type': 'sum',
                    'name': 'sum'
                }),
            ),
            'a_histogram':
            DruidMetric(
                metric_name='a_histogram',
                verbose_name='APPROXIMATE_HISTOGRAM(*)',
                metric_type='approxHistogramFold',
                json=json.dumps(
                    {
                        'type': 'approxHistogramFold',
                        'name': 'a_histogram'
                    }, ),
            ),
            'aCustomMetric':
            DruidMetric(
                metric_name='aCustomMetric',
                verbose_name='MY_AWESOME_METRIC(*)',
                metric_type='aCustomType',
                json=json.dumps(
                    {
                        'type': 'customMetric',
                        'name': 'aCustomMetric'
                    }, ),
            ),
            'quantile_p95':
            DruidMetric(
                metric_name='quantile_p95',
                verbose_name='P95(*)',
                metric_type='postagg',
                json=json.dumps({
                    'type': 'quantile',
                    'probability': 0.95,
                    'name': 'p95',
                    'fieldName': 'a_histogram',
                }),
            ),
            'aCustomPostAgg':
            DruidMetric(
                metric_name='aCustomPostAgg',
                verbose_name='CUSTOM_POST_AGG(*)',
                metric_type='postagg',
                json=json.dumps({
                    'type': 'customPostAgg',
                    'name': 'aCustomPostAgg',
                    'field': {
                        'type': 'fieldAccess',
                        'fieldName': 'aCustomMetric',
                    },
                }),
            ),
        }

        adhoc_metric = {
            'expressionType': 'SIMPLE',
            'column': {
                'type': 'DOUBLE',
                'column_name': 'value'
            },
            'aggregate': 'SUM',
            'label': 'My Adhoc Metric',
        }

        metrics = ['some_sum']
        saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs(
            metrics, metrics_dict)

        assert set(saved_metrics.keys()) == {'some_sum'}
        assert post_aggs == {}

        metrics = [adhoc_metric]
        saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs(
            metrics, metrics_dict)

        assert set(saved_metrics.keys()) == set([adhoc_metric['label']])
        assert post_aggs == {}

        metrics = ['some_sum', adhoc_metric]
        saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs(
            metrics, metrics_dict)

        assert set(saved_metrics.keys()) == {'some_sum', adhoc_metric['label']}
        assert post_aggs == {}

        metrics = ['quantile_p95']
        saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs(
            metrics, metrics_dict)

        result_postaggs = set(['quantile_p95'])
        assert set(saved_metrics.keys()) == {'a_histogram'}
        assert set(post_aggs.keys()) == result_postaggs

        metrics = ['aCustomPostAgg']
        saved_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs(
            metrics, metrics_dict)

        result_postaggs = set(['aCustomPostAgg'])
        assert set(saved_metrics.keys()) == {'aCustomMetric'}
        assert set(post_aggs.keys()) == result_postaggs
Пример #18
0
 def test_run_query_single_groupby(self):
     client = Mock()
     from_dttm = Mock()
     to_dttm = Mock()
     from_dttm.replace = Mock(return_value=from_dttm)
     to_dttm.replace = Mock(return_value=to_dttm)
     from_dttm.isoformat = Mock(return_value='from')
     to_dttm.isoformat = Mock(return_value='to')
     timezone = 'timezone'
     from_dttm.tzname = Mock(return_value=timezone)
     ds = DruidDatasource(datasource_name='datasource')
     metric1 = DruidMetric(metric_name='metric1')
     metric2 = DruidMetric(metric_name='metric2')
     ds.metrics = [metric1, metric2]
     col1 = DruidColumn(column_name='col1')
     col2 = DruidColumn(column_name='col2')
     ds.columns = [col1, col2]
     aggs = ['metric1']
     post_aggs = ['some_agg']
     ds._metrics_and_post_aggs = Mock(return_value=(aggs, post_aggs))
     groupby = ['col1']
     metrics = ['metric1']
     ds.get_having_filters = Mock(return_value=[])
     client.query_builder.last_query.query_dict = {'mock': 0}
     # client.topn is called twice
     ds.run_query(
         groupby,
         metrics,
         None,
         from_dttm,
         to_dttm,
         timeseries_limit=100,
         client=client,
         order_desc=True,
         filter=[],
     )
     self.assertEqual(2, len(client.topn.call_args_list))
     self.assertEqual(0, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     # check that there is no dimensions entry
     called_args_pre = client.topn.call_args_list[0][1]
     self.assertNotIn('dimensions', called_args_pre)
     self.assertIn('dimension', called_args_pre)
     called_args = client.topn.call_args_list[1][1]
     self.assertIn('dimension', called_args)
     self.assertEqual('col1', called_args['dimension'])
     # not order_desc
     client = Mock()
     client.query_builder.last_query.query_dict = {'mock': 0}
     ds.run_query(
         groupby,
         metrics,
         None,
         from_dttm,
         to_dttm,
         client=client,
         order_desc=False,
         filter=[],
         row_limit=100,
     )
     self.assertEqual(0, len(client.topn.call_args_list))
     self.assertEqual(1, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     self.assertIn('dimensions', client.groupby.call_args_list[0][1])
     self.assertEqual(['col1'],
                      client.groupby.call_args_list[0][1]['dimensions'])
     # order_desc but timeseries and dimension spec
     # calls topn with single dimension spec 'dimension'
     spec = {'outputName': 'hello', 'dimension': 'matcho'}
     spec_json = json.dumps(spec)
     col3 = DruidColumn(column_name='col3', dimension_spec_json=spec_json)
     ds.columns.append(col3)
     groupby = ['col3']
     client = Mock()
     client.query_builder.last_query.query_dict = {'mock': 0}
     ds.run_query(
         groupby,
         metrics,
         None,
         from_dttm,
         to_dttm,
         client=client,
         order_desc=True,
         timeseries_limit=5,
         filter=[],
         row_limit=100,
     )
     self.assertEqual(2, len(client.topn.call_args_list))
     self.assertEqual(0, len(client.groupby.call_args_list))
     self.assertEqual(0, len(client.timeseries.call_args_list))
     self.assertIn('dimension', client.topn.call_args_list[0][1])
     self.assertIn('dimension', client.topn.call_args_list[1][1])
     # uses dimension for pre query and full spec for final query
     self.assertEqual('matcho',
                      client.topn.call_args_list[0][1]['dimension'])
     self.assertEqual(spec, client.topn.call_args_list[1][1]['dimension'])