def test_parse_array(self): name = "array" type = "array<string>" comment = "test_parse_array" column = {"name": name, "type": "array", "comment": comment, "item": {"type": "string"}} parse_tree = parser.parse_column(name, type, comment) assert_equal(parse_tree, column)
def test_parse_simple(self): name = "simple" type = "string" comment = "test_parse_simple" column = {"name": name, "type": type, "comment": comment} parse_tree = parser.parse_column(name, type, comment) assert_equal(parse_tree, column)
def autocomplete(request, database=None, table=None, column=None, nested=None): app_name = get_app_name(request) query_server = get_query_server_config(app_name) do_as = request.user if (request.user.is_superuser or request.user.has_hue_permission( action="impersonate", app="security")) and 'doas' in request.GET: do_as = User.objects.get(username=request.GET.get('doas')) db = dbms.get(do_as, query_server) response = {} try: if database is None: response['databases'] = db.get_databases() elif table is None: response['tables'] = db.get_tables(database=database) elif column is None: t = db.get_table(database, table) response['hdfs_link'] = t.hdfs_link response['columns'] = [column.name for column in t.cols] response['extended_columns'] = massage_columns_for_json(t.cols) else: col = db.get_column(database, table, column) if col: parse_tree = parser.parse_column(col.name, col.type, col.comment) if nested: parse_tree = _extract_nested_type(parse_tree, nested) response = parse_tree else: raise Exception('Could not find column `%s`.`%s`.`%s`' % (database, table, column)) except (QueryServerTimeoutException, TTransportException), e: response['code'] = 503 response['error'] = e.message
def _autocomplete(db, database=None, table=None, column=None, nested=None): response = {} try: if database is None: response['databases'] = db.get_databases() elif table is None: tables_meta = db.get_tables_meta(database=database) response['tables_meta'] = tables_meta elif column is None: table = db.get_table(database, table) response['hdfs_link'] = table.hdfs_link response['comment'] = table.comment cols_extended = massage_columns_for_json(table.cols) if 'org.apache.kudu.mapreduce.KuduTableOutputFormat' in str( table.properties): # When queries from Impala directly table.is_impala_only = True if table.is_impala_only: # Expand Kudu columns information query_server = get_query_server_config('impala') db = dbms.get(db.client.user, query_server) col_options = db.get_table_describe(database, table.name) extra_col_options = dict([(col[0], dict(zip(col_options.cols(), col))) for col in col_options.rows()]) for col_props in cols_extended: col_props.update( extra_col_options.get(col_props['name'], {})) response['support_updates'] = table.is_impala_only response['columns'] = [column.name for column in table.cols] response['extended_columns'] = cols_extended response['is_view'] = table.is_view response['partition_keys'] = [{ 'name': part.name, 'type': part.type } for part in table.partition_keys] else: col = db.get_column(database, table, column) if col: parse_tree = parser.parse_column(col.name, col.type, col.comment) if nested: parse_tree = _extract_nested_type(parse_tree, nested) response = parse_tree # If column or nested type is scalar/primitive, add sample of values if parser.is_scalar_type(parse_tree['type']): sample = _get_sample_data(db, database, table, column) if 'rows' in sample: response['sample'] = sample['rows'] else: raise Exception('Could not find column `%s`.`%s`.`%s`' % (database, table, column)) except (QueryServerTimeoutException, TTransportException), e: response['code'] = 503 response['error'] = e.message
def test_parse_map(self): name = "map" type = "map<string,int>" comment = "test_parse_map" column = {"name": name, "type": "map", "comment": comment, "key": {"type": "string"}, "value": {"type": "int"}} parse_tree = parser.parse_column(name, type, comment) assert_equal(parse_tree, column)
def test_parse_map(self): name = 'map' type = 'map<string,int>' comment = 'test_parse_map' column = {'name': name, 'type': 'map', 'comment': comment, 'key': {'type': 'string'}, 'value': {'type': 'int'}} parse_tree = parser.parse_column(name, type, comment) assert_equal(parse_tree, column)
def _autocomplete(db, database=None, table=None, column=None, nested=None): response = {} try: if database is None: response['databases'] = db.get_databases() elif table is None: tables_meta = db.get_tables_meta(database=database) response['tables_meta'] = tables_meta elif column is None: t = db.get_table(database, table) response['hdfs_link'] = t.hdfs_link response['columns'] = [column.name for column in t.cols] response['extended_columns'] = massage_columns_for_json(t.cols) response['partition_keys'] = [{'name': part.name, 'type': part.type} for part in t.partition_keys] else: col = db.get_column(database, table, column) if col: parse_tree = parser.parse_column(col.name, col.type, col.comment) if nested: parse_tree = _extract_nested_type(parse_tree, nested) response = parse_tree # If column or nested type is scalar/primitive, add sample of values if parser.is_scalar_type(parse_tree['type']): table_obj = db.get_table(database, table) sample = db.get_sample(database, table_obj, column, nested) if sample: sample = set([row[0] for row in sample.rows()]) response['sample'] = sorted(list(sample)) else: raise Exception('Could not find column `%s`.`%s`.`%s`' % (database, table, column)) except (QueryServerTimeoutException, TTransportException), e: response['code'] = 503 response['error'] = e.message
def test_parse_nested_with_array(self): name = 'nested' type = 'struct<fieldname1:bigint,fieldname2:int,fieldname3:int,fieldname4:array<bigint>,fieldname5:bigint,fieldname6:array<struct<array_elem:string>>,fieldname7:string>' comment = 'test_parse_nested' column = {'comment': 'test_parse_nested', 'fields': [{'type': 'bigint', 'name': 'fieldname1'}, {'type': 'int', 'name': 'fieldname2'}, {'type': 'int', 'name': 'fieldname3'}, {'item': {'type': 'bigint'}, 'type': 'array', 'name': 'fieldname4'}, {'type': 'bigint', 'name': 'fieldname5'}, {'item': {'fields': [{'type': 'string', 'name': 'array_elem'}], 'type': 'struct'}, 'type': 'array', 'name': 'fieldname6'}, {'type': 'string', 'name': 'fieldname7'}], 'type': 'struct', 'name': 'nested'} parse_tree = parser.parse_column(name, type, comment) assert_equal(parse_tree, column)
def autocomplete(request, database=None, table=None, column=None, nested=None): app_name = get_app_name(request) query_server = get_query_server_config(app_name) do_as = request.user if (request.user.is_superuser or request.user.has_hue_permission(action="impersonate", app="security")) and 'doas' in request.GET: do_as = User.objects.get(username=request.GET.get('doas')) db = dbms.get(do_as, query_server) response = {} try: if database is None: response['databases'] = db.get_databases() elif table is None: response['tables'] = db.get_tables(database=database) elif column is None: t = db.get_table(database, table) response['hdfs_link'] = t.hdfs_link response['columns'] = [column.name for column in t.cols] response['extended_columns'] = massage_columns_for_json(t.cols) else: col = db.get_column(database, table, column) if col: parse_tree = parser.parse_column(col.name, col.type, col.comment) if nested: parse_tree = _extract_nested_type(parse_tree, nested) response = parse_tree else: raise Exception('Could not find column `%s`.`%s`.`%s`' % (database, table, column)) except (QueryServerTimeoutException, TTransportException), e: response['code'] = 503 response['error'] = e.message
def test_parse_varchar(self): name = 'varchar' type = 'varchar(1000)' comment = 'test_parse_varchar' column = {'name': name, 'type': type, 'comment': comment} parse_tree = parser.parse_column(name, type, comment) assert_equal(parse_tree, column)
def test_parse_decimal(self): name = 'simple' type = 'decimal(12,2)' comment = 'test_parse_decimal' column = {'name': name, 'type': type, 'comment': comment} parse_tree = parser.parse_column(name, type, comment) assert_equal(parse_tree, column)
def _autocomplete(db, database=None, table=None, column=None, nested=None): response = {} try: if database is None: response['databases'] = db.get_databases() elif table is None: response['tables'] = db.get_tables(database=database) elif column is None: t = db.get_table(database, table) response['hdfs_link'] = t.hdfs_link response['columns'] = [column.name for column in t.cols] response['extended_columns'] = massage_columns_for_json(t.cols) else: col = db.get_column(database, table, column) if col: parse_tree = parser.parse_column(col.name, col.type, col.comment) if nested: parse_tree = _extract_nested_type(parse_tree, nested) response = parse_tree else: raise Exception('Could not find column `%s`.`%s`.`%s`' % (database, table, column)) except (QueryServerTimeoutException, TTransportException), e: response['code'] = 503 response['error'] = e.message
def test_parse_array(self): name = 'array' type = 'array<string>' comment = 'test_parse_array' column = {'name': name, 'type': 'array', 'comment': comment, 'item': {'type': 'string'}} parse_tree = parser.parse_column(name, type, comment) assert_equal(parse_tree, column)
def test_parse_simple(self): name = 'simple' type = 'string' comment = 'test_parse_simple' column = {'name': name, 'type': type, 'comment': comment} parse_tree = parser.parse_column(name, type, comment) assert_equal(parse_tree, column)
def test_parse_nested(self): name = 'nested' type = 'array<struct<name:string,age:int>>' comment = 'test_parse_nested' column = {'name': name, 'type': 'array', 'comment': comment, 'item': {'type': 'struct', 'fields': [{'name': 'name', 'type': 'string'}, {'name': 'age', 'type': 'int'}]}} parse_tree = parser.parse_column(name, type, comment) assert_equal(parse_tree, column)
def _autocomplete(db, database=None, table=None, column=None, nested=None): response = {} try: if database is None: response['databases'] = db.get_databases() elif table is None: response['tables'] = db.get_tables(database=database) elif column is None: t = db.get_table(database, table) response['hdfs_link'] = t.hdfs_link response['columns'] = [column.name for column in t.cols] response['extended_columns'] = massage_columns_for_json(t.cols) else: col = db.get_column(database, table, column) if col: parse_tree = parser.parse_column(col.name, col.type, col.comment) if nested: parse_tree = _extract_nested_type(parse_tree, nested) response = parse_tree # If column or nested type is scalar/primitive, add sample of values if parser.is_scalar_type(parse_tree['type']): table_obj = db.get_table(database, table) sample = db.get_sample(database, table_obj, column, nested) if sample: sample = set([row[0] for row in sample.rows()]) response['sample'] = sorted(list(sample)) else: raise Exception('Could not find column `%s`.`%s`.`%s`' % (database, table, column)) except (QueryServerTimeoutException, TTransportException), e: response['code'] = 503 response['error'] = e.message
def test_parse_struct(self): name = 'struct' type = 'struct<name:string,age:int>' comment = 'test_parse_struct' column = {'name': name, 'type': 'struct', 'comment': comment, 'fields': [{'name': 'name', 'type': 'string'}, {'name': 'age', 'type': 'int'}]} parse_tree = parser.parse_column(name, type, comment) assert_equal(parse_tree, column)
def _autocomplete(db, database=None, table=None, column=None, nested=None, query=None, cluster=None): response = {} try: if database is None: response['databases'] = db.get_databases() elif table is None: tables_meta = db.get_tables_meta(database=database) response['tables_meta'] = tables_meta elif column is None: if query is not None: table = SubQueryTable(db, query) else: table = db.get_table(database, table) response['hdfs_link'] = table.hdfs_link response['comment'] = table.comment cols_extended = massage_columns_for_json(table.cols) if table.is_impala_only: if db.client.query_server['server_name'] != 'impala': # Expand Kudu columns information query_server = get_query_server_config('impala', cluster=cluster) db = dbms.get(db.client.user, query_server, cluster=cluster) col_options = db.get_table_describe(database, table.name) extra_col_options = dict([(col[0], dict(list(zip(col_options.cols(), col)))) for col in col_options.rows()]) for col_props in cols_extended: col_props.update(extra_col_options.get(col_props['name'], {})) response['support_updates'] = table.is_impala_only response['columns'] = [column.name for column in table.cols] response['extended_columns'] = cols_extended response['is_view'] = table.is_view response['partition_keys'] = [{'name': part.name, 'type': part.type} for part in table.partition_keys] else: col = db.get_column(database, table, column) if col: parse_tree = parser.parse_column(col.name, col.type, col.comment) if nested: parse_tree = _extract_nested_type(parse_tree, nested) response = parse_tree # If column or nested type is scalar/primitive, add sample of values if parser.is_scalar_type(parse_tree['type']): sample = _get_sample_data(db, database, table, column, cluster=cluster) if 'rows' in sample: response['sample'] = sample['rows'] else: raise Exception('Could not find column `%s`.`%s`.`%s`' % (database, table, column)) except (QueryServerTimeoutException, TTransportException) as e: response['code'] = 503 response['error'] = e.message except Exception as e: LOG.warn('Autocomplete data fetching error: %s' % e) response['code'] = 500 response['error'] = e.message return response
def test_parse_nested(self): name = "nested" type = "array<struct<name:string,age:int>>" comment = "test_parse_nested" column = { "name": name, "type": "array", "comment": comment, "item": {"type": "struct", "fields": [{"name": "name", "type": "string"}, {"name": "age", "type": "int"}]}, } parse_tree = parser.parse_column(name, type, comment) assert_equal(parse_tree, column)
def test_parse_struct(self): name = "struct" type = "struct<name:string,age:int>" comment = "test_parse_struct" column = { "name": name, "type": "struct", "comment": comment, "fields": [{"name": "name", "type": "string"}, {"name": "age", "type": "int"}], } parse_tree = parser.parse_column(name, type, comment) assert_equal(parse_tree, column)
def _autocomplete(db, database=None, table=None, column=None, nested=None, query=None, cluster=None): response = {} try: if database is None: response['databases'] = db.get_databases() elif table is None: tables_meta = db.get_tables_meta(database=database) response['tables_meta'] = tables_meta elif column is None: if query is not None: table = SubQueryTable(db, query) else: table = db.get_table(database, table) response['hdfs_link'] = table.hdfs_link response['comment'] = table.comment cols_extended = massage_columns_for_json(table.cols) if table.is_impala_only: if db.client.query_server['server_name'] != 'impala': # Expand Kudu columns information query_server = get_query_server_config('impala', cluster=cluster) db = dbms.get(db.client.user, query_server, cluster=cluster) col_options = db.get_table_describe(database, table.name) extra_col_options = dict([(col[0], dict(zip(col_options.cols(), col))) for col in col_options.rows()]) for col_props in cols_extended: col_props.update(extra_col_options.get(col_props['name'], {})) response['support_updates'] = table.is_impala_only response['columns'] = [column.name for column in table.cols] response['extended_columns'] = cols_extended response['is_view'] = table.is_view response['partition_keys'] = [{'name': part.name, 'type': part.type} for part in table.partition_keys] else: col = db.get_column(database, table, column) if col: parse_tree = parser.parse_column(col.name, col.type, col.comment) if nested: parse_tree = _extract_nested_type(parse_tree, nested) response = parse_tree # If column or nested type is scalar/primitive, add sample of values if parser.is_scalar_type(parse_tree['type']): sample = _get_sample_data(db, database, table, column, cluster=cluster) if 'rows' in sample: response['sample'] = sample['rows'] else: raise Exception('Could not find column `%s`.`%s`.`%s`' % (database, table, column)) except (QueryServerTimeoutException, TTransportException), e: response['code'] = 503 response['error'] = e.message
def _autocomplete(db, database=None, table=None, column=None, nested=None, query=None, cluster=None, operation='schema'): response = {} try: if operation == 'functions': response['functions'] = _get_functions(db, database) elif operation == 'function': response['function'] = _get_function(db, database) elif database is None: response['databases'] = db.get_databases() elif table is None: tables_meta = db.get_tables_meta(database=database) response['tables_meta'] = tables_meta elif column is None: if query is not None: table = SubQueryTable(db, query) else: table = db.get_table(database, table) response['hdfs_link'] = table.hdfs_link response['comment'] = table.comment cols_extended = massage_columns_for_json(table.cols) if table.is_impala_only: # Expand Kudu table information if db.client.query_server['dialect'] != 'impala': query_server = get_query_server_config('impala', connector=cluster) db = dbms.get(db.client.user, query_server, cluster=cluster) col_options = db.get_table_describe( database, table.name) # Expand columns information extra_col_options = dict([ (col[0], dict(list(zip(col_options.cols(), col)))) for col in col_options.rows() ]) for col_props in cols_extended: col_props.update( extra_col_options.get(col_props['name'], {})) primary_keys = [ col['name'] for col in extra_col_options.values() if col.get('primary_key') == 'true' ] # Until IMPALA-8291 foreign_keys = [] # Not supported yet else: primary_keys = [pk.name for pk in table.primary_keys] foreign_keys = table.foreign_keys response['support_updates'] = table.is_impala_only response['columns'] = [column.name for column in table.cols] response['extended_columns'] = cols_extended response['is_view'] = table.is_view response['partition_keys'] = [{ 'name': part.name, 'type': part.type } for part in table.partition_keys] response['primary_keys'] = [{'name': pk} for pk in primary_keys] response['foreign_keys'] = [{ 'name': fk.name, 'to': fk.type } for fk in foreign_keys] else: col = db.get_column(database, table, column) if col: parse_tree = parser.parse_column(col.name, col.type, col.comment) if nested: parse_tree = _extract_nested_type(parse_tree, nested) response = parse_tree # If column or nested type is scalar/primitive, add sample of values if parser.is_scalar_type(parse_tree['type']): sample = _get_sample_data(db, database, table, column, cluster=cluster) if 'rows' in sample: response['sample'] = sample['rows'] else: raise Exception('Could not find column `%s`.`%s`.`%s`' % (database, table, column)) except (QueryServerTimeoutException, TTransportException) as e: response['code'] = 503 response['error'] = str(e) except TypeError as e: response['code'] = 500 response['error'] = str(e) except Exception as e: LOG.exception('Autocomplete data fetching error') response['code'] = 500 response['error'] = str(e) return response