def test_aggregation(self, vector): exec_option = vector.get_value('exec_option') disable_codegen = exec_option['disable_codegen'] data_type, agg_func = (vector.get_value('data_type'), vector.get_value('agg_func')) query = 'select %s(%s_col) from alltypesagg where day is not null' % (agg_func, data_type) result = self.execute_query(query, exec_option, table_format=vector.get_value('table_format')) assert len(result.data) == 1 self.verify_agg_result(agg_func, data_type, False, result.data[0]); if not disable_codegen: # Verify codegen was enabled for the preaggregation. # It is deliberately disabled for the merge aggregation. assert_codegen_enabled(result.runtime_profile, [1]) query = 'select %s(DISTINCT(%s_col)) from alltypesagg where day is not null' % ( agg_func, data_type) result = self.execute_query(query, vector.get_value('exec_option')) assert len(result.data) == 1 self.verify_agg_result(agg_func, data_type, True, result.data[0]); if not disable_codegen: # Verify codegen was enabled for all stages of the aggregation. assert_codegen_enabled(result.runtime_profile, [1, 2, 4, 6])
def test_group_concat(self, vector): """group_concat distinct tests Required to run directly in python because the order in which results will be merged at the final, single-node aggregation step is non-deterministic (if the first phase is running on multiple nodes). Need to pull the result apart and compare the actual items)""" exec_option = vector.get_value('exec_option') disable_codegen = exec_option['disable_codegen'] table_format = vector.get_value('table_format') # Test group_concat distinct with other aggregate function and groupings. # expected result is the row: 2010,'1, 2, 3, 4','1-2-3-4','2|3|1|4',40,4 query = """select year, group_concat(distinct string_col), group_concat(distinct string_col, '-'), group_concat(distinct string_col, '|'), count(string_col), count(distinct string_col) from alltypesagg where int_col < 5 and year = 2010 group by year""" result = self.execute_query(query, exec_option, table_format=table_format) row = (result.data)[0].split("\t") assert(len(row) == 6) assert(row[0] == '2010') delimiter = [', ', '-', '|'] for i in range(1, 4): assert(set(row[i].split(delimiter[i-1])) == set(['1', '2', '3', '4'])) assert(row[4] == '40') assert(row[5] == '4') check_codegen_enabled = not disable_codegen and not USING_OLD_AGGS_JOINS if check_codegen_enabled: # Verify codegen was enabled for all three stages of the aggregation. assert_codegen_enabled(result.runtime_profile, [1, 2, 4]) # Test group_concat distinct with arrow delimiter, with multiple rows query = """select day, group_concat(distinct string_col, "->") from (select * from alltypesagg where id % 100 = day order by id limit 99999) a group by day order by day""" result = self.execute_query(query, exec_option, table_format=table_format) string_col = [] string_col.append(set(['1','101','201','301','401','501','601','701','801','901'])) string_col.append(set(['2','102','202','302','402','502','602','702','802','902'])) string_col.append(set(['3','103','203','303','403','503','603','703','803','903'])) string_col.append(set(['4','104','204','304','404','504','604','704','804','904'])) string_col.append(set(['5','105','205','305','405','505','605','705','805','905'])) string_col.append(set(['6','106','206','306','406','506','606','706','806','906'])) string_col.append(set(['7','107','207','307','407','507','607','707','807','907'])) string_col.append(set(['8','108','208','308','408','508','608','708','808','908'])) string_col.append(set(['9','109','209','309','409','509','609','709','809','909'])) string_col.append(set(['10','110','210','310','410','510','610','710','810','910'])) assert(len(result.data) == 10) for i in range(10): row = (result.data)[i].split("\t") assert(len(row) == 2) assert(row[0] == str(i+1)) assert(set(row[1].split("->")) == string_col[i]) # Test group_concat distinct with merge node query = """select group_concat(distinct string_col, ' ') from alltypesagg where int_col < 10""" result = self.execute_query(query, exec_option, table_format=table_format) assert(set((result.data)[0].split(" ")) == set(['1','2','3','4','5','6','7','8','9'])) if check_codegen_enabled: # Verify codegen was enabled for all four stages of the aggregation. assert_codegen_enabled(result.runtime_profile, [1, 2, 4, 6])
def test_aggregation(self, vector): exec_option = vector.get_value('exec_option') disable_codegen = exec_option['disable_codegen'] # The old aggregation node does not support codegen for all aggregate functions. check_codegen_enabled = not disable_codegen and not USING_OLD_AGGS_JOINS data_type, agg_func = (vector.get_value('data_type'), vector.get_value('agg_func')) query = 'select %s(%s_col) from alltypesagg where day is not null' % (agg_func, data_type) result = self.execute_query(query, exec_option, table_format=vector.get_value('table_format')) assert len(result.data) == 1 self.verify_agg_result(agg_func, data_type, False, result.data[0]); if check_codegen_enabled: # Verify codegen was enabled for the preaggregation. # It is deliberately disabled for the merge aggregation. assert_codegen_enabled(result.runtime_profile, [1]) query = 'select %s(DISTINCT(%s_col)) from alltypesagg where day is not null' % ( agg_func, data_type) result = self.execute_query(query, vector.get_value('exec_option')) assert len(result.data) == 1 self.verify_agg_result(agg_func, data_type, True, result.data[0]); if check_codegen_enabled: # Verify codegen was enabled for all stages of the aggregation. assert_codegen_enabled(result.runtime_profile, [1, 2, 4, 6])
def test_group_concat(self, vector): """group_concat distinct tests Required to run directly in python because the order in which results will be merged at the final, single-node aggregation step is non-deterministic (if the first phase is running on multiple nodes). Need to pull the result apart and compare the actual items)""" exec_option = vector.get_value('exec_option') disable_codegen = exec_option['disable_codegen'] table_format = vector.get_value('table_format') # Test group_concat distinct with other aggregate function and groupings. # expected result is the row: 2010,'1, 2, 3, 4','1-2-3-4','2|3|1|4',40,4 query = """select year, group_concat(distinct string_col), group_concat(distinct string_col, '-'), group_concat(distinct string_col, '|'), count(string_col), count(distinct string_col) from alltypesagg where int_col < 5 and year = 2010 group by year""" result = self.execute_query(query, exec_option, table_format=table_format) row = (result.data)[0].split("\t") assert(len(row) == 6) assert(row[0] == '2010') delimiter = [', ', '-', '|'] for i in range(1, 4): assert(set(row[i].split(delimiter[i-1])) == set(['1', '2', '3', '4'])) assert(row[4] == '40') assert(row[5] == '4') if not disable_codegen: # Verify codegen was enabled for all three stages of the aggregation. assert_codegen_enabled(result.runtime_profile, [1, 2, 4]) # Test group_concat distinct with arrow delimiter, with multiple rows query = """select day, group_concat(distinct string_col, "->") from (select * from alltypesagg where id % 100 = day order by id limit 99999) a group by day order by day""" result = self.execute_query(query, exec_option, table_format=table_format) string_col = [] string_col.append(set(['1','101','201','301','401','501','601','701','801','901'])) string_col.append(set(['2','102','202','302','402','502','602','702','802','902'])) string_col.append(set(['3','103','203','303','403','503','603','703','803','903'])) string_col.append(set(['4','104','204','304','404','504','604','704','804','904'])) string_col.append(set(['5','105','205','305','405','505','605','705','805','905'])) string_col.append(set(['6','106','206','306','406','506','606','706','806','906'])) string_col.append(set(['7','107','207','307','407','507','607','707','807','907'])) string_col.append(set(['8','108','208','308','408','508','608','708','808','908'])) string_col.append(set(['9','109','209','309','409','509','609','709','809','909'])) string_col.append(set(['10','110','210','310','410','510','610','710','810','910'])) assert(len(result.data) == 10) for i in range(10): row = (result.data)[i].split("\t") assert(len(row) == 2) assert(row[0] == str(i+1)) assert(set(row[1].split("->")) == string_col[i]) # Test group_concat distinct with merge node query = """select group_concat(distinct string_col, ' ') from alltypesagg where int_col < 10""" result = self.execute_query(query, exec_option, table_format=table_format) assert(set((result.data)[0].split(" ")) == set(['1','2','3','4','5','6','7','8','9'])) if not disable_codegen: # Verify codegen was enabled for all four stages of the aggregation. assert_codegen_enabled(result.runtime_profile, [1, 2, 4, 6])
def test_select_node_codegen(self, vector): """Test that select node is codegened""" result = self.execute_query('select * from (select * from functional.alltypes ' 'limit 1000000) t1 where int_col > 10 limit 10') exec_options = get_node_exec_options(result.runtime_profile, 1) # Make sure test fails if there are no exec options in the profile for the node assert len(exec_options) > 0 assert_codegen_enabled(result.runtime_profile, [1])
def test_select_node_codegen(self, vector): """Test that select node is codegened""" result = self.execute_query('select * from (select * from functional.alltypes ' 'limit 1000000) t1 where int_col > 10 limit 10') exec_options = get_node_exec_options(result.runtime_profile, 1) # Make sure test fails if there are no exec options in the profile for the node assert len(exec_options) > 0 assert_codegen_enabled(result.runtime_profile, [1])