def test_import_druid_override_sync(self): datasource, dict_datasource = self.create_druid_datasource( 'druid_override', id=ID_PREFIX + 3, cols_names=['col1'], metric_names=['m1']) imported_cluster = DruidDatasource.import_from_dict( db.session, dict_datasource) db.session.commit() table_over, table_over_dict = self.create_druid_datasource( 'druid_override', id=ID_PREFIX + 3, cols_names=['new_col1', 'col2', 'col3'], metric_names=['new_metric1']) imported_over_cluster = DruidDatasource.import_from_dict( session=db.session, dict_rep=table_over_dict, sync=['metrics', 'columns']) # syncing metrics and columns db.session.commit() imported_over = self.get_datasource(imported_over_cluster.id) self.assertEquals(imported_cluster.id, imported_over.id) expected_datasource, _ = self.create_druid_datasource( 'druid_override', id=ID_PREFIX + 3, metric_names=['new_metric1'], cols_names=['new_col1', 'col2', 'col3']) self.assert_datasource_equals(expected_datasource, imported_over)
def test_get_filters_converts_strings_to_num(self): filtr = {'col': 'A', 'op': 'in', 'val': ['6']} res = DruidDatasource.get_filters([filtr], ['A']) self.assertEqual(6, res.filter['filter']['value']) filtr = {'col': 'A', 'op': '==', 'val': '6'} res = DruidDatasource.get_filters([filtr], ['A']) self.assertEqual(6, res.filter['filter']['value'])
def test_get_filters_handles_arrays_for_string_types(self): filtr = {'col': 'A', 'op': '==', 'val': ['a', 'b']} res = DruidDatasource.get_filters([filtr], []) self.assertEqual('a', res.filter['filter']['value']) filtr = {'col': 'A', 'op': '==', 'val': []} res = DruidDatasource.get_filters([filtr], []) self.assertEqual('', res.filter['filter']['value'])
def test_metrics_and_post_aggs_tree(self): metrics = ['A', 'B', 'm1', 'm2'] metrics_dict = {} for i in range(ord('A'), ord('K') + 1): emplace(metrics_dict, chr(i), True) for i in range(1, 10): emplace(metrics_dict, 'm' + str(i), False) def depends_on(index, fields): dependents = fields if isinstance(fields, list) else [fields] metrics_dict[index].json_obj = {'fieldNames': dependents} depends_on('A', ['m1', 'D', 'C']) depends_on('B', ['B', 'C', 'E', 'F', 'm3']) depends_on('C', ['H', 'I']) depends_on('D', ['m2', 'm5', 'G', 'C']) depends_on('E', ['H', 'I', 'J']) depends_on('F', ['J', 'm5']) depends_on('G', ['m4', 'm7', 'm6', 'A']) depends_on('H', ['A', 'm4', 'I']) depends_on('I', ['H', 'K']) depends_on('J', 'K') depends_on('K', ['m8', 'm9']) all_metrics, postaggs = DruidDatasource.metrics_and_post_aggs( metrics, metrics_dict) expected_metrics = set(all_metrics) self.assertEqual(9, len(all_metrics)) for i in range(1, 10): expected_metrics.remove('m' + str(i)) self.assertEqual(0, len(expected_metrics)) self.assertEqual(11, len(postaggs)) for i in range(ord('A'), ord('K') + 1): del postaggs[chr(i)] self.assertEqual(0, len(postaggs))
def test_import_druid_override_identical(self): datasource = self.create_druid_datasource( 'copy_cat', id=10005, cols_names=['new_col1', 'col2', 'col3'], metric_names=['new_metric1']) imported_id = DruidDatasource.import_obj( datasource, import_time=1993) copy_datasource = self.create_druid_datasource( 'copy_cat', id=10005, cols_names=['new_col1', 'col2', 'col3'], metric_names=['new_metric1']) imported_id_copy = DruidDatasource.import_obj( copy_datasource, import_time=1994) self.assertEquals(imported_id, imported_id_copy) self.assert_datasource_equals( copy_datasource, self.get_datasource(imported_id))
def test_get_filters_constructs_filter_in(self): filtr = {'col': 'A', 'op': 'in', 'val': ['a', 'b', 'c']} res = DruidDatasource.get_filters([filtr], []) self.assertIn('filter', res.filter) self.assertIn('fields', res.filter['filter']) self.assertEqual('or', res.filter['filter']['type']) self.assertEqual(3, len(res.filter['filter']['fields']))
def create_druid_datasource(self, name, id=0, cols_names=[], metric_names=[]): name = '{0}{1}'.format(NAME_PREFIX, name) cluster_name = 'druid_test' params = {DBREF: id, 'database_name': cluster_name} dict_rep = { 'cluster_name': cluster_name, 'datasource_name': name, 'id': id, 'params': json.dumps(params), 'columns': [{ 'column_name': c } for c in cols_names], 'metrics': [{ 'metric_name': c } for c in metric_names], } datasource = DruidDatasource( id=id, datasource_name=name, cluster_name=cluster_name, params=json.dumps(params), ) for col_name in cols_names: datasource.columns.append(DruidColumn(column_name=col_name)) for metric_name in metric_names: datasource.metrics.append(DruidMetric(metric_name=metric_name)) return datasource, dict_rep
def test_import_druid_no_metadata(self): datasource, dict_datasource = self.create_druid_datasource( 'pure_druid', id=ID_PREFIX + 1) imported_cluster = DruidDatasource.import_from_dict( db.session, dict_datasource) db.session.commit() imported = self.get_datasource(imported_cluster.id) self.assert_datasource_equals(datasource, imported)
def test_get_filters_constructs_bounds_filter(self): filtr = {'col': 'A', 'op': '>=', 'val': 'h'} res = DruidDatasource.get_filters([filtr], []) self.assertFalse(res.filter['filter']['lowerStrict']) self.assertEqual('A', res.filter['filter']['dimension']) self.assertEqual('h', res.filter['filter']['lower']) self.assertFalse(res.filter['filter']['alphaNumeric']) filtr['op'] = '>' res = DruidDatasource.get_filters([filtr], []) self.assertTrue(res.filter['filter']['lowerStrict']) filtr['op'] = '<=' res = DruidDatasource.get_filters([filtr], []) self.assertFalse(res.filter['filter']['upperStrict']) self.assertEqual('h', res.filter['filter']['upper']) filtr['op'] = '<' res = DruidDatasource.get_filters([filtr], []) self.assertTrue(res.filter['filter']['upperStrict'])
def test_import_druid_2_col_2_met(self): datasource = self.create_druid_datasource( 'druid_2_col_2_met', id=10003, cols_names=['c1', 'c2'], metric_names=['m1', 'm2']) imported_id = DruidDatasource.import_obj( datasource, import_time=1991) imported = self.get_datasource(imported_id) self.assert_datasource_equals(datasource, imported)
def test_get_filters_constructs_filter_not_equals(self): filtr = {'col': 'A', 'op': '!=', 'val': 'h'} res = DruidDatasource.get_filters([filtr], []) self.assertEqual('not', res.filter['filter']['type']) self.assertEqual( 'h', res.filter['filter']['field'].filter['filter']['value'], )
def test_import_druid_override(self): datasource = self.create_druid_datasource( 'druid_override', id=10004, cols_names=['col1'], metric_names=['m1']) imported_id = DruidDatasource.import_obj( datasource, import_time=1991) table_over = self.create_druid_datasource( 'druid_override', id=10004, cols_names=['new_col1', 'col2', 'col3'], metric_names=['new_metric1']) imported_over_id = DruidDatasource.import_obj( table_over, import_time=1992) imported_over = self.get_datasource(imported_over_id) self.assertEquals(imported_id, imported_over.id) expected_datasource = self.create_druid_datasource( 'druid_override', id=10004, metric_names=['new_metric1', 'm1'], cols_names=['col1', 'new_col1', 'col2', 'col3']) self.assert_datasource_equals(expected_datasource, imported_over)
def test_import_druid_2_col_2_met(self): datasource, dict_datasource = self.create_druid_datasource( 'druid_2_col_2_met', id=ID_PREFIX + 3, cols_names=['c1', 'c2'], metric_names=['m1', 'm2']) imported_cluster = DruidDatasource.import_from_dict( db.session, dict_datasource) db.session.commit() imported = self.get_datasource(imported_cluster.id) self.assert_datasource_equals(datasource, imported)
def test_import_druid_1_col_1_met(self): datasource = self.create_druid_datasource( 'druid_1_col_1_met', id=10002, cols_names=['col1'], metric_names=['metric1']) imported_id = DruidDatasource.import_obj( datasource, import_time=1990) imported = self.get_datasource(imported_id) self.assert_datasource_equals(datasource, imported) self.assertEquals( {'remote_id': 10002, 'import_time': 1990, 'database_name': 'druid_test'}, json.loads(imported.params))
def test_import_druid_override_identical(self): datasource, dict_datasource = self.create_druid_datasource( 'copy_cat', id=ID_PREFIX + 4, cols_names=['new_col1', 'col2', 'col3'], metric_names=['new_metric1']) imported = DruidDatasource.import_from_dict(session=db.session, dict_rep=dict_datasource) db.session.commit() copy_datasource, dict_cp_datasource = self.create_druid_datasource( 'copy_cat', id=ID_PREFIX + 4, cols_names=['new_col1', 'col2', 'col3'], metric_names=['new_metric1']) imported_copy = DruidDatasource.import_from_dict( db.session, dict_cp_datasource) db.session.commit() self.assertEquals(imported.id, imported_copy.id) self.assert_datasource_equals(copy_datasource, self.get_datasource(imported.id))
def test_import_druid_1_col_1_met(self): datasource, dict_datasource = self.create_druid_datasource( 'druid_1_col_1_met', id=ID_PREFIX + 2, cols_names=['col1'], metric_names=['metric1']) imported_cluster = DruidDatasource.import_from_dict( db.session, dict_datasource) db.session.commit() imported = self.get_datasource(imported_cluster.id) self.assert_datasource_equals(datasource, imported) self.assertEquals({ DBREF: ID_PREFIX + 2, 'database_name': 'druid_test' }, json.loads(imported.params))
def create_druid_datasource( self, name, id=0, cols_names=[], metric_names=[]): params = {'remote_id': id, 'database_name': 'druid_test'} datasource = DruidDatasource( id=id, datasource_name=name, cluster_name='druid_test', params=json.dumps(params), ) for col_name in cols_names: datasource.columns.append( DruidColumn(column_name=col_name)) for metric_name in metric_names: datasource.metrics.append(DruidMetric( metric_name=metric_name)) return datasource
def test_recursive_get_fields(self): conf = { 'type': 'quantile', 'fieldName': 'f1', 'field': { 'type': 'custom', 'fields': [{ 'type': 'fieldAccess', 'fieldName': 'f2', }, { 'type': 'fieldAccess', 'fieldName': 'f3', }, { 'type': 'quantiles', 'fieldName': 'f4', 'field': { 'type': 'custom', }, }, { 'type': 'custom', 'fields': [{ 'type': 'fieldAccess', 'fieldName': 'f5', }, { 'type': 'fieldAccess', 'fieldName': 'f2', 'fields': [{ 'type': 'fieldAccess', 'fieldName': 'f3', }, { 'type': 'fieldIgnoreMe', 'fieldName': 'f6', }], }], }], }, } fields = DruidDatasource.recursive_get_fields(conf) expected = set(['f1', 'f2', 'f3', 'f4', 'f5']) self.assertEqual(5, len(fields)) for field in fields: expected.remove(field) self.assertEqual(0, len(expected))
def test_run_query_multiple_groupby(self): client = Mock() from_dttm = Mock() to_dttm = Mock() from_dttm.replace = Mock(return_value=from_dttm) to_dttm.replace = Mock(return_value=to_dttm) from_dttm.isoformat = Mock(return_value='from') to_dttm.isoformat = Mock(return_value='to') timezone = 'timezone' from_dttm.tzname = Mock(return_value=timezone) ds = DruidDatasource(datasource_name='datasource') metric1 = DruidMetric(metric_name='metric1') metric2 = DruidMetric(metric_name='metric2') ds.metrics = [metric1, metric2] col1 = DruidColumn(column_name='col1') col2 = DruidColumn(column_name='col2') ds.columns = [col1, col2] all_metrics = [] post_aggs = ['some_agg'] ds._metrics_and_post_aggs = Mock(return_value=(all_metrics, post_aggs)) groupby = ['col1', 'col2'] metrics = ['metric1'] ds.get_having_filters = Mock(return_value=[]) client.query_builder = Mock() client.query_builder.last_query = Mock() client.query_builder.last_query.query_dict = {'mock': 0} # no groupby calls client.timeseries ds.run_query( groupby, metrics, None, from_dttm, to_dttm, client=client, row_limit=100, filter=[], ) self.assertEqual(0, len(client.topn.call_args_list)) self.assertEqual(1, len(client.groupby.call_args_list)) self.assertEqual(0, len(client.timeseries.call_args_list)) # check that there is no dimensions entry called_args = client.groupby.call_args_list[0][1] self.assertIn('dimensions', called_args) self.assertEqual(['col1', 'col2'], called_args['dimensions'])
def __init__(self, *args, **kwargs): if ( self.requires_examples and not os.environ.get('SOLO_TEST') and not os.environ.get('examples_loaded') ): logging.info('Loading examples') cli.load_examples(load_test_data=True) logging.info('Done loading examples') sync_role_definitions() os.environ['examples_loaded'] = '1' else: sync_role_definitions() super(RookTestCase, self).__init__(*args, **kwargs) self.client = app.test_client() self.maxDiff = None gamma_sqllab_role = sm.add_role('gamma_sqllab') for perm in sm.find_role('Gamma').permissions: sm.add_permission_role(gamma_sqllab_role, perm) db_perm = self.get_main_database(sm.get_session).perm security.merge_perm(sm, 'database_access', db_perm) db_pvm = sm.find_permission_view_menu( view_menu_name=db_perm, permission_name='database_access') gamma_sqllab_role.permissions.append(db_pvm) for perm in sm.find_role('sql_lab').permissions: sm.add_permission_role(gamma_sqllab_role, perm) admin = appbuilder.sm.find_user('admin') if not admin: appbuilder.sm.add_user( 'admin', 'admin', ' user', '*****@*****.**', appbuilder.sm.find_role('Admin'), password='******') gamma = appbuilder.sm.find_user('gamma') if not gamma: appbuilder.sm.add_user( 'gamma', 'gamma', 'user', '*****@*****.**', appbuilder.sm.find_role('Gamma'), password='******') gamma2 = appbuilder.sm.find_user('gamma2') if not gamma2: appbuilder.sm.add_user( 'gamma2', 'gamma2', 'user', '*****@*****.**', appbuilder.sm.find_role('Gamma'), password='******') gamma_sqllab_user = appbuilder.sm.find_user('gamma_sqllab') if not gamma_sqllab_user: appbuilder.sm.add_user( 'gamma_sqllab', 'gamma_sqllab', 'user', '*****@*****.**', gamma_sqllab_role, password='******') alpha = appbuilder.sm.find_user('alpha') if not alpha: appbuilder.sm.add_user( 'alpha', 'alpha', 'user', '*****@*****.**', appbuilder.sm.find_role('Alpha'), password='******') sm.get_session.commit() # create druid cluster and druid datasources session = db.session cluster = ( session.query(DruidCluster) .filter_by(cluster_name='druid_test') .first() ) if not cluster: cluster = DruidCluster(cluster_name='druid_test') session.add(cluster) session.commit() druid_datasource1 = DruidDatasource( datasource_name='druid_ds_1', cluster_name='druid_test', ) session.add(druid_datasource1) druid_datasource2 = DruidDatasource( datasource_name='druid_ds_2', cluster_name='druid_test', ) session.add(druid_datasource2) session.commit()
def test_get_filters_ignores_in_not_in_with_empty_value(self): filtr1 = {'col': 'A', 'op': 'in', 'val': []} filtr2 = {'col': 'A', 'op': 'not in', 'val': []} res = DruidDatasource.get_filters([filtr1, filtr2], []) self.assertEqual(None, res)
def test_get_filters_constructs_equals_for_in_not_in_single_value(self): filtr = {'col': 'A', 'op': 'in', 'val': ['a']} res = DruidDatasource.get_filters([filtr], []) self.assertEqual('selector', res.filter['filter']['type'])
def test_get_filters_extracts_values_in_quotes(self): filtr = {'col': 'A', 'op': 'in', 'val': [' "a" ']} res = DruidDatasource.get_filters([filtr], []) self.assertEqual('a', res.filter['filter']['value'])
def test_run_query_single_groupby(self): client = Mock() from_dttm = Mock() to_dttm = Mock() from_dttm.replace = Mock(return_value=from_dttm) to_dttm.replace = Mock(return_value=to_dttm) from_dttm.isoformat = Mock(return_value='from') to_dttm.isoformat = Mock(return_value='to') timezone = 'timezone' from_dttm.tzname = Mock(return_value=timezone) ds = DruidDatasource(datasource_name='datasource') metric1 = DruidMetric(metric_name='metric1') metric2 = DruidMetric(metric_name='metric2') ds.metrics = [metric1, metric2] col1 = DruidColumn(column_name='col1') col2 = DruidColumn(column_name='col2') ds.columns = [col1, col2] all_metrics = ['metric1'] post_aggs = ['some_agg'] ds._metrics_and_post_aggs = Mock(return_value=(all_metrics, post_aggs)) groupby = ['col1'] metrics = ['metric1'] ds.get_having_filters = Mock(return_value=[]) client.query_builder.last_query.query_dict = {'mock': 0} # client.topn is called twice ds.run_query( groupby, metrics, None, from_dttm, to_dttm, timeseries_limit=100, client=client, order_desc=True, filter=[], ) self.assertEqual(2, len(client.topn.call_args_list)) self.assertEqual(0, len(client.groupby.call_args_list)) self.assertEqual(0, len(client.timeseries.call_args_list)) # check that there is no dimensions entry called_args_pre = client.topn.call_args_list[0][1] self.assertNotIn('dimensions', called_args_pre) self.assertIn('dimension', called_args_pre) called_args = client.topn.call_args_list[1][1] self.assertIn('dimension', called_args) self.assertEqual('col1', called_args['dimension']) # not order_desc client = Mock() client.query_builder.last_query.query_dict = {'mock': 0} ds.run_query( groupby, metrics, None, from_dttm, to_dttm, client=client, order_desc=False, filter=[], row_limit=100, ) self.assertEqual(0, len(client.topn.call_args_list)) self.assertEqual(1, len(client.groupby.call_args_list)) self.assertEqual(0, len(client.timeseries.call_args_list)) self.assertIn('dimensions', client.groupby.call_args_list[0][1]) self.assertEqual(['col1'], client.groupby.call_args_list[0][1]['dimensions']) # order_desc but timeseries and dimension spec # calls topn with single dimension spec 'dimension' spec = {'outputName': 'hello', 'dimension': 'matcho'} spec_json = json.dumps(spec) col3 = DruidColumn(column_name='col3', dimension_spec_json=spec_json) ds.columns.append(col3) groupby = ['col3'] client = Mock() client.query_builder.last_query.query_dict = {'mock': 0} ds.run_query( groupby, metrics, None, from_dttm, to_dttm, client=client, order_desc=True, timeseries_limit=5, filter=[], row_limit=100, ) self.assertEqual(2, len(client.topn.call_args_list)) self.assertEqual(0, len(client.groupby.call_args_list)) self.assertEqual(0, len(client.timeseries.call_args_list)) self.assertIn('dimension', client.topn.call_args_list[0][1]) self.assertIn('dimension', client.topn.call_args_list[1][1]) # uses dimension for pre query and full spec for final query self.assertEqual('matcho', client.topn.call_args_list[0][1]['dimension']) self.assertEqual(spec, client.topn.call_args_list[1][1]['dimension'])
def test_import_druid_no_metadata(self): datasource = self.create_druid_datasource('pure_druid', id=10001) imported_id = DruidDatasource.import_obj( datasource, import_time=1989) imported = self.get_datasource(imported_id) self.assert_datasource_equals(datasource, imported)
def test_get_filters_composes_multiple_filters(self): filtr1 = {'col': 'A', 'op': '!=', 'val': 'y'} filtr2 = {'col': 'B', 'op': 'in', 'val': ['a', 'b', 'c']} res = DruidDatasource.get_filters([filtr1, filtr2], []) self.assertEqual('and', res.filter['filter']['type']) self.assertEqual(2, len(res.filter['filter']['fields']))
def test_get_filters_constructs_regex_filter(self): filtr = {'col': 'A', 'op': 'regex', 'val': '[abc]'} res = DruidDatasource.get_filters([filtr], []) self.assertEqual('regex', res.filter['filter']['type']) self.assertEqual('[abc]', res.filter['filter']['pattern']) self.assertEqual('A', res.filter['filter']['dimension'])
def test_get_filters_ignores_invalid_filter_objects(self): filtr = {'col': 'col1', 'op': '=='} filters = [filtr] self.assertEqual(None, DruidDatasource.get_filters(filters, []))
def test_metrics_and_post_aggs(self): """ Test generation of metrics and post-aggregations from an initial list of rook metrics (which may include the results of either). This primarily tests that specifying a post-aggregator metric will also require the raw aggregation of the associated druid metric column. """ metrics_dict = { 'unused_count': DruidMetric( metric_name='unused_count', verbose_name='COUNT(*)', metric_type='count', json=json.dumps({ 'type': 'count', 'name': 'unused_count' }), ), 'some_sum': DruidMetric( metric_name='some_sum', verbose_name='SUM(*)', metric_type='sum', json=json.dumps({ 'type': 'sum', 'name': 'sum' }), ), 'a_histogram': DruidMetric( metric_name='a_histogram', verbose_name='APPROXIMATE_HISTOGRAM(*)', metric_type='approxHistogramFold', json=json.dumps( { 'type': 'approxHistogramFold', 'name': 'a_histogram' }, ), ), 'aCustomMetric': DruidMetric( metric_name='aCustomMetric', verbose_name='MY_AWESOME_METRIC(*)', metric_type='aCustomType', json=json.dumps( { 'type': 'customMetric', 'name': 'aCustomMetric' }, ), ), 'quantile_p95': DruidMetric( metric_name='quantile_p95', verbose_name='P95(*)', metric_type='postagg', json=json.dumps({ 'type': 'quantile', 'probability': 0.95, 'name': 'p95', 'fieldName': 'a_histogram', }), ), 'aCustomPostAgg': DruidMetric( metric_name='aCustomPostAgg', verbose_name='CUSTOM_POST_AGG(*)', metric_type='postagg', json=json.dumps({ 'type': 'customPostAgg', 'name': 'aCustomPostAgg', 'field': { 'type': 'fieldAccess', 'fieldName': 'aCustomMetric', }, }), ), } metrics = ['some_sum'] all_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs( metrics, metrics_dict) assert all_metrics == ['some_sum'] assert post_aggs == {} metrics = ['quantile_p95'] all_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs( metrics, metrics_dict) result_postaggs = set(['quantile_p95']) assert all_metrics == ['a_histogram'] assert set(post_aggs.keys()) == result_postaggs metrics = ['aCustomPostAgg'] all_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs( metrics, metrics_dict) result_postaggs = set(['aCustomPostAgg']) assert all_metrics == ['aCustomMetric'] assert set(post_aggs.keys()) == result_postaggs
def test_get_filters_constructs_filter_equals(self): filtr = {'col': 'A', 'op': '==', 'val': 'h'} res = DruidDatasource.get_filters([filtr], []) self.assertEqual('selector', res.filter['filter']['type']) self.assertEqual('A', res.filter['filter']['dimension']) self.assertEqual('h', res.filter['filter']['value'])