Пример #1
0
 def test_nested_filtered_aggregator(self):
     filter1 = filters.Filter(dimension="dim1", value="val")
     filter2 = filters.Filter(dimension="dim2", value="val")
     agg = aggregators.filtered(
         filter1, aggregators.filtered(filter2,
                                       aggregators.count("metric1")))
     actual = aggregators.build_aggregators({"agg_name": agg})
     # the innermost aggregation must have 'agg_name'
     expected = [{
         "type": "filtered",
         "aggregator": {
             "type": "filtered",
             "aggregator": {
                 "fieldName": "metric1",
                 "type": "count",
                 "name": "agg_name",
             },
             "filter": {
                 "dimension": "dim2",
                 "value": "val",
                 "type": "selector"
             },
         },
         "filter": {
             "dimension": "dim1",
             "value": "val",
             "type": "selector"
         },
     }]
     assert expected == actual
Пример #2
0
 def test_nested_filtered_aggregator(self):
     filter1 = filters.Filter(dimension='dim1', value='val')
     filter2 = filters.Filter(dimension='dim2', value='val')
     agg = aggregators.filtered(
         filter1, aggregators.filtered(filter2,
                                       aggregators.count('metric1')))
     actual = aggregators.build_aggregators({'agg_name': agg})
     # the innermost aggregation must have 'agg_name'
     expected = [{
         'type': 'filtered',
         'aggregator': {
             'type': 'filtered',
             'aggregator': {
                 'fieldName': 'metric1',
                 'type': 'count',
                 'name': 'agg_name'
             },
             'filter': {
                 'dimension': 'dim2',
                 'value': 'val',
                 'type': 'selector'
             }
         },
         'filter': {
             'dimension': 'dim1',
             'value': 'val',
             'type': 'selector'
         }
     }]
     assert expected == actual
Пример #3
0
    def test_build_filtered_aggregator(self):
        filter_ = filters.Filter(dimension="dim", value="val")
        agg_input = {
            "agg1": aggregators.filtered(filter_, aggregators.count("metric1")),
            "agg2": aggregators.filtered(filter_, aggregators.longsum("metric2")),
            "agg3": aggregators.filtered(filter_, aggregators.doublesum("metric3")),
            "agg4": aggregators.filtered(filter_, aggregators.min("metric4")),
            "agg5": aggregators.filtered(filter_, aggregators.max("metric5")),
            "agg6": aggregators.filtered(filter_, aggregators.hyperunique("metric6")),
            "agg7": aggregators.filtered(filter_, aggregators.cardinality("dim1")),
            "agg8": aggregators.filtered(filter_, aggregators.cardinality(["dim1", "dim2"], by_row=True)),
        }
        base = {"type": "filtered", "filter": {"type": "selector", "dimension": "dim", "value": "val"}}

        aggs = [
            {"name": "agg1", "type": "count", "fieldName": "metric1"},
            {"name": "agg2", "type": "longSum", "fieldName": "metric2"},
            {"name": "agg3", "type": "doubleSum", "fieldName": "metric3"},
            {"name": "agg4", "type": "min", "fieldName": "metric4"},
            {"name": "agg5", "type": "max", "fieldName": "metric5"},
            {"name": "agg6", "type": "hyperUnique", "fieldName": "metric6"},
            {"name": "agg7", "type": "cardinality", "fieldNames": ["dim1"], "byRow": False},
            {"name": "agg8", "type": "cardinality", "fieldNames": ["dim1", "dim2"], "byRow": True},
        ]
        expected = []
        for agg in aggs:
            exp = deepcopy(base)
            exp.update({"aggregator": agg})
            expected.append(exp)

        built_agg = aggregators.build_aggregators(agg_input)
        expected = sorted(built_agg, key=lambda k: itemgetter("name")(itemgetter("aggregator")(k)))
        actual = sorted(expected, key=lambda k: itemgetter("name")(itemgetter("aggregator")(k)))
        assert expected == actual
Пример #4
0
 def test_filtered_aggregator(self):
     filter_ = filters.Filter(dimension="dim", value="val")
     aggs = [
         aggregators.count("metric1"),
         aggregators.longsum("metric2"),
         aggregators.doublesum("metric3"),
         aggregators.doublemin("metric4"),
         aggregators.doublemax("metric5"),
         aggregators.hyperunique("metric6"),
         aggregators.cardinality("dim1"),
         aggregators.cardinality(["dim1", "dim2"], by_row=True),
         aggregators.thetasketch("dim1"),
         aggregators.thetasketch("metric7"),
         aggregators.thetasketch("metric8",
                                 isinputthetasketch=True,
                                 size=8192),
     ]
     for agg in aggs:
         expected = {
             "type": "filtered",
             "filter": {
                 "type": "selector",
                 "dimension": "dim",
                 "value": "val"
             },
             "aggregator": agg,
         }
         actual = aggregators.filtered(filter_, agg)
         assert actual == expected
Пример #5
0
 def test_nested_filtered_aggregator(self):
     filter1 = filters.Filter(dimension='dim1', value='val')
     filter2 = filters.Filter(dimension='dim2', value='val')
     agg = aggregators.filtered(filter1,
                                aggregators.filtered(filter2, aggregators.count('metric1')))
     actual = aggregators.build_aggregators({'agg_name': agg})
     # the innermost aggregation must have 'agg_name'
     expected = [{
         'type': 'filtered',
         'aggregator': {
             'type': 'filtered',
             'aggregator': {'fieldName': 'metric1', 'type': 'count', 'name': 'agg_name'},
             'filter': {'dimension': 'dim2', 'value': 'val', 'type': 'selector'}},
         'filter': {'dimension': 'dim1', 'value': 'val', 'type': 'selector'}
     }]
     assert expected == actual
Пример #6
0
 def test_filtered_aggregator(self):
     filter_ = filters.Filter(dimension='dim', value='val')
     aggs = [aggregators.count('metric1'),
             aggregators.longsum('metric2'),
             aggregators.doublesum('metric3'),
             aggregators.doublemin('metric4'),
             aggregators.doublemax('metric5'),
             aggregators.hyperunique('metric6'),
             aggregators.cardinality('dim1'),
             aggregators.cardinality(['dim1', 'dim2'], by_row=True),
             aggregators.thetasketch('dim1'),
             aggregators.thetasketch('metric7'),
             aggregators.thetasketch('metric8', isinputthetasketch=True, size=8192)
            ]
     for agg in aggs:
         expected = {
             'type': 'filtered',
             'filter': {
                 'type': 'selector',
                 'dimension': 'dim',
                 'value': 'val'
             },
             'aggregator': agg
         }
         actual = aggregators.filtered(filter_, agg)
         assert actual == expected
Пример #7
0
 def test_nested_filtered_aggregator(self):
     filter1 = filters.Filter(dimension="dim1", value="val")
     filter2 = filters.Filter(dimension="dim2", value="val")
     agg = aggregators.filtered(filter1, aggregators.filtered(filter2, aggregators.count("metric1")))
     actual = aggregators.build_aggregators({"agg_name": agg})
     # the innermost aggregation must have 'agg_name'
     expected = [
         {
             "type": "filtered",
             "aggregator": {
                 "type": "filtered",
                 "aggregator": {"fieldName": "metric1", "type": "count", "name": "agg_name"},
                 "filter": {"dimension": "dim2", "value": "val", "type": "selector"},
             },
             "filter": {"dimension": "dim1", "value": "val", "type": "selector"},
         }
     ]
     assert expected == actual
Пример #8
0
    def test_build_filtered_aggregator(self):
        filter_ = filters.Filter(dimension='dim', value='val')
        agg_input = {
            'agg1': aggregators.filtered(filter_,
                                         aggregators.count('metric1')),
            'agg2': aggregators.filtered(filter_,
                                         aggregators.longsum('metric2')),
            'agg3': aggregators.filtered(filter_,
                                         aggregators.doublesum('metric3')),
            'agg4': aggregators.filtered(filter_,
                                         aggregators.min('metric4')),
            'agg5': aggregators.filtered(filter_,
                                         aggregators.max('metric5')),
            'agg6': aggregators.filtered(filter_,
                                         aggregators.hyperunique('metric6')),
            'agg7': aggregators.filtered(filter_,
                                         aggregators.cardinality('dim1')),
            'agg8': aggregators.filtered(filter_,
                                         aggregators.cardinality(['dim1', 'dim2'], by_row=True)),
        }
        base = {
            'type': 'filtered',
            'filter': {
                'type': 'selector',
                'dimension': 'dim',
                'value': 'val'
            }
        }

        aggs = [
            {'name': 'agg1', 'type': 'count', 'fieldName': 'metric1'},
            {'name': 'agg2', 'type': 'longSum', 'fieldName': 'metric2'},
            {'name': 'agg3', 'type': 'doubleSum', 'fieldName': 'metric3'},
            {'name': 'agg4', 'type': 'min', 'fieldName': 'metric4'},
            {'name': 'agg5', 'type': 'max', 'fieldName': 'metric5'},
            {'name': 'agg6', 'type': 'hyperUnique', 'fieldName': 'metric6'},
            {'name': 'agg7', 'type': 'cardinality', 'fieldNames': ['dim1'], 'byRow': False},
            {'name': 'agg8', 'type': 'cardinality', 'fieldNames': ['dim1', 'dim2'], 'byRow': True},
        ]
        expected = []
        for agg in aggs:
            exp = deepcopy(base)
            exp.update({'aggregator': agg})
            expected.append(exp)

        built_agg = aggregators.build_aggregators(agg_input)
        expected = sorted(built_agg, key=lambda k: itemgetter('name')(
            itemgetter('aggregator')(k)))
        actual = sorted(expected, key=lambda k: itemgetter('name')(
            itemgetter('aggregator')(k)))
        assert expected == actual
Пример #9
0
 def test_filtered_aggregator(self):
     filter_ = filters.Filter(dimension="dim", value="val")
     aggs = [
         aggregators.count("metric1"),
         aggregators.longsum("metric2"),
         aggregators.doublesum("metric3"),
         aggregators.min("metric4"),
         aggregators.max("metric5"),
         aggregators.hyperunique("metric6"),
         aggregators.cardinality("dim1"),
         aggregators.cardinality(["dim1", "dim2"], by_row=True),
     ]
     for agg in aggs:
         expected = {
             "type": "filtered",
             "filter": {"type": "selector", "dimension": "dim", "value": "val"},
             "aggregator": agg,
         }
         actual = aggregators.filtered(filter_, agg)
         assert actual == expected
Пример #10
0
 def test_filtered_aggregator(self):
     filter_ = filters.Filter(dimension='dim', value='val')
     aggs = [aggregators.count('metric1'),
             aggregators.longsum('metric2'),
             aggregators.doublesum('metric3'),
             aggregators.min('metric4'),
             aggregators.max('metric5'),
             aggregators.hyperunique('metric6')]
     for agg in aggs:
         expected = {
             'type': 'filtered',
             'filter': {
                 'type': 'selector',
                 'dimension': 'dim',
                 'value': 'val'
             },
             'aggregator': agg
         }
         actual = aggregators.filtered(filter_, agg)
         assert actual == expected
Пример #11
0
def get_druid_data(dimensions=None, filter_list=[], filter_type="and",
                   order_by=["target_area_name"],
                   datasource=settings.DRUID_SPRAYDAY_DATASOURCE):
    """
    Runs a query against Druid, returns data with metrics
    Inputs:
        dimensions => list of dimensions to group by
        filter_list => list of list of things to filter with e.g.
                        filter_list=[['target_area_id', operator.ne, 1],
                                     ['sprayable', operator.eq, "true"],
                                     ['dimension', operator, "value"]])
        filter_type => type of Druid filter to perform,
        order_by => field(s) to order the data by
    """
    query = PyDruid(get_druid_broker_url(), 'druid/v2')
    params = dict(
        datasource=datasource,
        granularity='all',
        intervals=settings.DRUID_INTERVAL,
        aggregations={
            'num_not_sprayable': aggregators.filtered(
                filters.Filter(
                    type='and',
                    fields=[filters.Dimension('sprayable') == 'false']
                ),
                aggregators.longsum('count')
            ),
            'num_not_sprayed': aggregators.filtered(
                filters.Filter(
                    type='and',
                    fields=[filters.Dimension('sprayable') == 'true',
                            filters.Dimension('sprayed') ==
                            settings.MSPRAY_WAS_NOT_SPRAYED_VALUE]
                ),
                aggregators.longsum('count')
            ),
            'num_sprayed': aggregators.filtered(
                filters.Dimension('sprayed') ==
                settings.MSPRAY_WAS_SPRAYED_VALUE,
                aggregators.longsum('count')
            ),
            'num_new': aggregators.filtered(
                filters.Dimension('is_new') == 'true',
                aggregators.longsum('count')
            ),
            'num_new_no_duplicates': aggregators.filtered(
                filters.Filter(
                    type='and',
                    fields=[filters.Dimension('is_duplicate') == 'false',
                            filters.Dimension('is_new') == 'true']
                ),
                aggregators.longsum('count')
            ),
            'num_duplicate': aggregators.filtered(
                filters.Dimension('is_duplicate') == 'true',
                aggregators.longsum('count')
            ),
            'num_sprayed_no_duplicates': aggregators.filtered(
                filters.Filter(
                    type='and',
                    fields=[filters.Dimension('is_duplicate') == 'false',
                            filters.Dimension('sprayed') ==
                            settings.MSPRAY_WAS_SPRAYED_VALUE]
                ),
                aggregators.longsum('count')
            ),
            'num_not_sprayed_no_duplicates': aggregators.filtered(
                filters.Filter(
                    type='and',
                    fields=[filters.Dimension('is_duplicate') == 'false',
                            filters.Dimension('sprayable') == 'true',
                            filters.Dimension('sprayed') ==
                            settings.MSPRAY_WAS_NOT_SPRAYED_VALUE]
                ),
                aggregators.longsum('count')
            ),
            'num_sprayed_duplicates': aggregators.filtered(
                filters.Filter(
                    type='and',
                    fields=[filters.Dimension('is_duplicate') == 'true',
                            filters.Dimension('sprayable') == 'true',
                            filters.Dimension('sprayed') ==
                            settings.MSPRAY_WAS_SPRAYED_VALUE]
                ),
                aggregators.longsum('count')
            ),
            'num_not_sprayable_no_duplicates': aggregators.filtered(
                filters.Filter(
                    type='and',
                    fields=[filters.Dimension('is_duplicate') == 'false',
                            filters.Dimension('sprayable') == 'false']
                ),
                aggregators.longsum('count')
            ),
            'num_refused': aggregators.filtered(
                filters.Filter(
                    type='and',
                    fields=[filters.Dimension('is_duplicate') == 'false',
                            filters.Dimension('is_refused') == 'true',
                            filters.Dimension('sprayed') ==
                            settings.MSPRAY_WAS_NOT_SPRAYED_VALUE]
                ),
                aggregators.longsum('count')
            ),
        },
        post_aggregations={
            'num_found': Field('num_sprayed_no_duplicates') +
            Field('num_sprayed_duplicates') +
            Field('num_not_sprayed_no_duplicates')
        },
        limit_spec={
            "type": "default",
            "limit": 50000,
            "columns": order_by
        }
    )
    if filter_list:
        fields = []
        for this_filter in filter_list:
            compare_dim = filters.Dimension(this_filter[0])
            comparison_operator = this_filter[1]  # e.g. operator.eq
            compare_dim_value = this_filter[2]
            fields.append(comparison_operator(compare_dim, compare_dim_value))
        params['filter'] = filters.Filter(
            type=filter_type,
            fields=fields
        )

    if dimensions is None:
        params['dimensions'] = ['target_area_id', 'target_area_name',
                                'target_area_structures']
    else:
        params['dimensions'] = dimensions

    try:
        request = query.groupby(**params)
    except OSError:
        return []
    else:
        return request.result
Пример #12
0
    def test_build_filtered_aggregator(self):
        filter_ = filters.Filter(dimension="dim", value="val")
        agg_input = {
            "agg1":
            aggregators.filtered(filter_, aggregators.count("metric1")),
            "agg2":
            aggregators.filtered(filter_, aggregators.longsum("metric2")),
            "agg3":
            aggregators.filtered(filter_, aggregators.doublesum("metric3")),
            "agg4":
            aggregators.filtered(filter_, aggregators.doublemin("metric4")),
            "agg5":
            aggregators.filtered(filter_, aggregators.doublemax("metric5")),
            "agg6":
            aggregators.filtered(filter_, aggregators.hyperunique("metric6")),
            "agg7":
            aggregators.filtered(filter_, aggregators.cardinality("dim1")),
            "agg8":
            aggregators.filtered(
                filter_, aggregators.cardinality(["dim1", "dim2"],
                                                 by_row=True)),
            "agg9":
            aggregators.filtered(filter_, aggregators.thetasketch("dim1")),
            "agg10":
            aggregators.filtered(filter_, aggregators.thetasketch("metric7")),
            "agg11":
            aggregators.filtered(
                filter_,
                aggregators.thetasketch("metric8",
                                        isinputthetasketch=True,
                                        size=8192),
            ),
        }
        base = {
            "type": "filtered",
            "filter": {
                "type": "selector",
                "dimension": "dim",
                "value": "val"
            },
        }

        aggs = [
            {
                "name": "agg1",
                "type": "count",
                "fieldName": "metric1"
            },
            {
                "name": "agg2",
                "type": "longSum",
                "fieldName": "metric2"
            },
            {
                "name": "agg3",
                "type": "doubleSum",
                "fieldName": "metric3"
            },
            {
                "name": "agg4",
                "type": "doubleMin",
                "fieldName": "metric4"
            },
            {
                "name": "agg5",
                "type": "doubleMax",
                "fieldName": "metric5"
            },
            {
                "name": "agg6",
                "type": "hyperUnique",
                "fieldName": "metric6"
            },
            {
                "name": "agg7",
                "type": "cardinality",
                "fieldNames": ["dim1"],
                "byRow": False,
            },
            {
                "name": "agg8",
                "type": "cardinality",
                "fieldNames": ["dim1", "dim2"],
                "byRow": True,
            },
            {
                "name": "agg9",
                "type": "thetaSketch",
                "fieldName": "dim1",
                "isInputThetaSketch": False,
                "size": 16384,
            },
            {
                "name": "agg10",
                "type": "thetaSketch",
                "fieldName": "metric7",
                "isInputThetaSketch": False,
                "size": 16384,
            },
            {
                "name": "agg11",
                "type": "thetaSketch",
                "fieldName": "metric8",
                "isInputThetaSketch": True,
                "size": 8192,
            },
        ]
        expected = []
        for agg in aggs:
            exp = deepcopy(base)
            exp.update({"aggregator": agg})
            expected.append(exp)

        built_agg = aggregators.build_aggregators(agg_input)
        expected = sorted(built_agg,
                          key=lambda k: itemgetter("name")
                          (itemgetter("aggregator")(k)))
        actual = sorted(expected,
                        key=lambda k: itemgetter("name")
                        (itemgetter("aggregator")(k)))
        assert expected == actual
Пример #13
0
    def test_build_filtered_aggregator(self):
        filter_ = filters.Filter(dimension='dim', value='val')
        agg_input = {
            'agg1': aggregators.filtered(filter_,
                                         aggregators.count('metric1')),
            'agg2': aggregators.filtered(filter_,
                                         aggregators.longsum('metric2')),
            'agg3': aggregators.filtered(filter_,
                                         aggregators.doublesum('metric3')),
            'agg4': aggregators.filtered(filter_,
                                         aggregators.doublemin('metric4')),
            'agg5': aggregators.filtered(filter_,
                                         aggregators.doublemax('metric5')),
            'agg6': aggregators.filtered(filter_,
                                         aggregators.hyperunique('metric6')),
            'agg7': aggregators.filtered(filter_,
                                         aggregators.cardinality('dim1')),
            'agg8': aggregators.filtered(filter_,
                                         aggregators.cardinality(['dim1', 'dim2'], by_row=True)),
            'agg9': aggregators.filtered(filter_,
                                         aggregators.thetasketch('dim1')),
            'agg10': aggregators.filtered(filter_,
                                         aggregators.thetasketch('metric7')),
            'agg11': aggregators.filtered(filter_,
                                         aggregators.thetasketch('metric8', isinputthetasketch = True, size=8192)),
        }
        base = {
            'type': 'filtered',
            'filter': {
                'type': 'selector',
                'dimension': 'dim',
                'value': 'val'
            }
        }

        aggs = [
            {'name': 'agg1', 'type': 'count', 'fieldName': 'metric1'},
            {'name': 'agg2', 'type': 'longSum', 'fieldName': 'metric2'},
            {'name': 'agg3', 'type': 'doubleSum', 'fieldName': 'metric3'},
            {'name': 'agg4', 'type': 'doubleMin', 'fieldName': 'metric4'},
            {'name': 'agg5', 'type': 'doubleMax', 'fieldName': 'metric5'},
            {'name': 'agg6', 'type': 'hyperUnique', 'fieldName': 'metric6'},
            {'name': 'agg7', 'type': 'cardinality', 'fieldNames': ['dim1'], 'byRow': False},
            {'name': 'agg8', 'type': 'cardinality', 'fieldNames': ['dim1', 'dim2'], 'byRow': True},
            {'name': 'agg9', 'type': 'thetaSketch', 'fieldName': 'dim1', 'isInputThetaSketch': False, 'size': 16384},
            {'name': 'agg10', 'type': 'thetaSketch', 'fieldName': 'metric7', 'isInputThetaSketch': False, 'size': 16384},
            {'name': 'agg11', 'type': 'thetaSketch', 'fieldName': 'metric8', 'isInputThetaSketch': True, 'size': 8192}

        ]
        expected = []
        for agg in aggs:
            exp = deepcopy(base)
            exp.update({'aggregator': agg})
            expected.append(exp)

        built_agg = aggregators.build_aggregators(agg_input)
        expected = sorted(built_agg, key=lambda k: itemgetter('name')(
            itemgetter('aggregator')(k)))
        actual = sorted(expected, key=lambda k: itemgetter('name')(
            itemgetter('aggregator')(k)))
        assert expected == actual