def __fetch_results(self): done_count = 0 total_done = len(self.config_concurrent) while done_count < total_done: for token_name in self.config_concurrent: config = self.config_concurrent[token_name] configTest = config.configTest engine = self.drill if configTest.compare_with == "drill" else self.spark if not self.config_concurrent[token_name].done: if self.bc.status(config.token): self.config_concurrent[token_name].done = True done_count = done_count + 1 print("==>> Fetch result for ", token_name) result_gdf = self.bc.fetch(config.token) runTest.run_query( self.bc, engine, config.query, config.test_name, self.name, configTest.apply_order, configTest.order_by_col, configTest.acceptable_difference, configTest.use_percentage, config.fileSchemaType, print_result=configTest.print_result, query_spark=configTest.spark_query, comparing=configTest.comparing, message_validation=configTest.message_validation, blz_result=result_gdf ) self.config_concurrent = {}
def run_queries(bc, dask_client, nRals, drill, dir_data_lc, tables, **kwargs): sql_table_filter_map = kwargs.get("sql_table_filter_map", {}) sql_table_batch_size_map = kwargs.get("sql_table_batch_size_map", {}) sql = kwargs.get("sql_connection", None) print("######## Starting queries ...########") extra_args = { "table_names": tables, "init_tables": True, "ds_types": data_types, "sql_table_filter_map": sql_table_filter_map, "sql_table_batch_size_map": sql_table_batch_size_map, "sql_connection": sql, } currrentFileSchemaType = data_types[0] for sampleId, query, queryId, fileSchemaType in samples( bc, dask_client, nRals, **extra_args): datasourceDone = (fileSchemaType != currrentFileSchemaType) if datasourceDone and Settings.execution_mode == ExecutionMode.GENERATOR: print("==============================") break_flag = True break print("==>> Run query for sample", sampleId) runTest.run_query(bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, print_result=True) currrentFileSchemaType = fileSchemaType
def run_queries(bc, dask_client, nRals, drill, spark, dir_data_lc, tables, **kwargs): sql_table_filter_map = kwargs.get("sql_table_filter_map", {}) sql_table_batch_size_map = kwargs.get("sql_table_batch_size_map", {}) sql = kwargs.get("sql_connection", None) print("######## Starting queries ...########") extra_args = { "table_names": tables, "init_tables": True, "ds_types": data_types, "sql_table_filter_map": sql_table_filter_map, "sql_table_batch_size_map": sql_table_batch_size_map, "sql_connection": sql, "dir_data_lc": dir_data_lc, } currrentFileSchemaType = data_types[0] for sampleUID, sampleId, fileSchemaType, datasource_tables in sample_items( bc, dask_client, nRals, **extra_args): datasourceDone = (fileSchemaType != currrentFileSchemaType) if datasourceDone and Settings.execution_mode == ExecutionMode.GENERATOR: print("==============================") break_flag = True break sample = samples[sampleId] query = sample.table_mapper( sample.query, datasource_tables ) # map to tables with datasource info: order_csv, nation_csv ... worder = sample.worder use_percentage = sample.use_percentage acceptable_difference = sample.acceptable_difference use_pyspark = sample.use_pyspark engine = spark if use_pyspark else drill query_spark = sample.table_mapper( sample.query ) # map to tables without datasource info: order, nation ... print("==>> Run query for sample", sampleId) print("PLAN:") print(bc.explain(query, True)) runTest.run_query(bc, engine, query, sampleId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, query_spark=query_spark, print_result=True) currrentFileSchemaType = fileSchemaType
def __executionTest(self): listCase = list(self.data.keys()) print("######## Starting queries ...########") for n in range(0, len(self.configLocal.data_types)): fileSchemaType = self.configLocal.data_types[n] if self.__skip_test(fileSchemaType, self.configLocal): continue createSchema.create_tables(self.bc, self.dir_data_file, fileSchemaType, tables=list(self.tables)) for test_name in listCase: test_case = self.data[test_name] if Settings.execution_mode == ExecutionMode.GENERATOR: print("==============================") break_flag = True break configTest = self.__loadTestCaseConfig(test_name, fileSchemaType) if self.__skip_test(fileSchemaType, configTest): continue query = self.__getQuery(test_case) engine = self.drill if configTest.compare_with == "drill" else self.spark print("==>> Run query for test case", self.name) if configTest.message_validation == "": print("PLAN:") print(self.bc.explain(query, True)) runTest.run_query( self.bc, engine, query, test_name, self.name, configTest.apply_order, configTest.order_by_col, configTest.acceptable_difference, configTest.use_percentage, fileSchemaType, print_result=configTest.print_result, query_spark=configTest.spark_query, comparing=configTest.comparing, message_validation=configTest.message_validation)
def executionTest (queryType): tables = ["nation", "region", "customer", "orders", "lineitem"] data_types = [DataType.DASK_CUDF, DataType.CUDF, DataType.CSV, DataType.ORC, DataType.PARQUET] # TODO json #Create Tables ------------------------------------------------------------------------------------------------------------ for fileSchemaType in data_types: if skip_test(dask_client, nRals, fileSchemaType, queryType): continue cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables) #Run Query ----------------------------------------------------------------------------- worder = 1 #Parameter to indicate if its necessary to order the resulsets before compare them use_percentage = False acceptable_difference = 0 print('==============================') print(queryType + " Tests") print('==============================') queryId = 'TEST_01' query = """select ROUND(orders.o_orderkey), ROUND(orders.o_totalprice) from customer left outer join orders on customer.c_custkey = orders.o_custkey where customer.c_nationkey = 3 and customer.c_custkey < 500""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_02' query = """select ROUND(orders.o_totalprice, 2), ROUND(orders.o_totalprice, -2) from customer left outer join orders on customer.c_custkey = orders.o_custkey where customer.c_nationkey = 3 and customer.c_custkey < 500""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_03' query = """select customer.c_custkey, orders.o_orderkey, ROUND(orders.o_custkey,0) from customer left outer join orders on customer.c_custkey = orders.o_custkey where customer.c_nationkey = 3 and customer.c_custkey < 500""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_04' query = """select MAX(ROUND(n1.n_regionkey,3)) from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6 """ runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) #WSM NEED TO REVISIT THIS queryId = 'TEST_05' query = "select ROUND(AVG(o_totalprice)) from orders" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) if Settings.execution_mode == ExecutionMode.GENERATOR: print("==============================") break
def executionTest(): tables = ["nation"] data_types = [ DataType.DASK_CUDF, DataType.CUDF, DataType.CSV, DataType.ORC, DataType.PARQUET ] # TODO json #Create Tables ------------------------------------------------------------------------------------------------------------ for fileSchemaType in data_types: if skip_test(dask_client, nRals, fileSchemaType, queryType): continue cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables) #Run Query ----------------------------------------------------------------------------- worder = 1 #Parameter to indicate if its necessary to order the resulsets before compare them use_percentage = False acceptable_difference = 0 print('==============================') print(queryType) print('==============================') queryId = 'TEST_01' query = """select n1.n_nationkey as n1key, n2.n_nationkey as n2key, n1.n_nationkey + n2.n_nationkey from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_02' query = "select n1.n_nationkey as n1key, n2.n_nationkey as n2key, n1.n_nationkey + n2.n_nationkey from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6 where n1.n_nationkey < 10" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_03' query = "select n1.n_nationkey as n1key, n2.n_nationkey as n2key, n1.n_nationkey + n2.n_nationkey from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6 where n1.n_nationkey < 10 and n1.n_nationkey > 5" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_04' query = """select n1.n_nationkey as n1key, n2.n_nationkey as n2key, n1.n_nationkey + n2.n_nationkey from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6 and n1.n_nationkey + 1 = n2.n_nationkey + 7 and n1.n_nationkey + 2 = n2.n_nationkey + 8""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) if Settings.execution_mode == ExecutionMode.GENERATOR: print("==============================") break
def fetch_result(bc, tokens, sampleId, drill, query, queryId, fileSchemaType): token = tokens[sampleId] if bc.status(token): print("==>> Fetch result for sample", sampleId) result_gdf = bc.fetch(token) runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, blz_result=result_gdf, ) return True return False
def executionTest(): tables = ["nation", "region", "customer"] data_types = [DataType.DASK_CUDF, DataType.CUDF, DataType.CSV, DataType.ORC, DataType.PARQUET] # TODO json #Create Tables ------------------------------------------------------------------------------------------------------------ for fileSchemaType in data_types: if skip_test(dask_client, nRals, fileSchemaType, queryType): continue cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables) #Run Query ----------------------------------------------------------------------------- worder = 1 #Parameter to indicate if its necessary to order the resulsets before compare them use_percentage = False acceptable_difference = 0 print('==============================') print(queryType) print('==============================') queryId = 'TEST_01' query = "select n_nationkey, n_regionkey from nation group by n_regionkey, n_nationkey" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_02' query = "select c_custkey, c_nationkey from customer where c_acctbal < 1000 group by c_nationkey, c_custkey" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_03' query = "select c.c_custkey, r.r_regionkey, c.c_custkey + r.r_regionkey as addy from customer as c inner join region as r on c.c_nationkey = r.r_regionkey group by r.r_regionkey, c.c_custkey" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_04' query = "select c_nationkey, c_custkey from customer where c_acctbal < 10000 group by c_nationkey, c_custkey order by c_nationkey desc, c_custkey asc" runTest.run_query(bc, drill, query, queryId, queryType, 0, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_05' query = "select c.c_custkey, r.r_regionkey, c.c_custkey + r.r_regionkey as addy from customer as c inner join region as r on c.c_nationkey = r.r_regionkey where c.c_acctbal < 1000 group by r.r_regionkey, c.c_custkey order by c.c_custkey desc" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) if Settings.execution_mode == ExecutionMode.GENERATOR: print("==============================") break
def executionTest(): tables = ["nation", "region", "customer", "orders", "part", "partsupp", "supplier"] data_types = [DataType.DASK_CUDF, DataType.CUDF, DataType.CSV, DataType.ORC, DataType.PARQUET] # TODO json #Create Tables ------------------------------------------------------------------------------------------------------------ for fileSchemaType in data_types: if skip_test(dask_client, nRals, fileSchemaType, queryType): continue cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables) #Run Query ----------------------------------------------------------------------------- worder = 1 #Parameter to indicate if its necessary to order the resulsets before compare them use_percentage = False acceptable_difference = 0.01 print('==============================') print(queryType) print('==============================') queryId = 'TEST_01' query = """select maxPrice, avgSize from (select avg(CAST(p_size AS DOUBLE)) as avgSize, max(p_retailprice) as maxPrice, min(p_retailprice) as minPrice from part ) as partAnalysis order by maxPrice, avgSize""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_02' query = """select custOrders.avgPrice, custOrders.numOrders from customer inner join (select o_custkey as o_custkey, avg(o_totalprice) as avgPrice, count(o_totalprice) as numOrders from orders where o_custkey <= 100 group by o_custkey) as custOrders on custOrders.o_custkey = customer.c_custkey where customer.c_nationkey <= 5""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_03' query = """select partSuppTemp.partKey, partAnalysis.avgSize from (select min(p_partkey) as partKey, avg(CAST(p_size AS DOUBLE)) as avgSize, max(p_retailprice) as maxPrice, min(p_retailprice) as minPrice from part ) as partAnalysis inner join (select ps_partkey as partKey, ps_suppkey as suppKey from partsupp where ps_availqty > 2) as partSuppTemp on partAnalysis.partKey = partSuppTemp.partKey inner join (select s_suppkey as suppKey from supplier ) as supplierTemp on supplierTemp.suppKey = partSuppTemp.suppKey""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_04' query = """select avg(CAST(custKey AS DOUBLE)) from (select customer.c_custkey as custKey from (select min(o_custkey) as o_custkey from orders ) as tempOrders inner join customer on tempOrders.o_custkey = customer.c_custkey where customer.c_nationkey > 6) as joinedTables""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) if Settings.execution_mode == ExecutionMode.GENERATOR: print("==============================") break
def executionTest(): tables = ["customer"] data_types = [ DataType.DASK_CUDF, DataType.CUDF, DataType.CSV, DataType.ORC, DataType.PARQUET ] # TODO json #Create Tables ------------------------------------------------------------------------------------------------------------ for fileSchemaType in data_types: if skip_test(dask_client, nRals, fileSchemaType, queryType): continue cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables) #Run Query ----------------------------------------------------------------------------- worder = 0 #Parameter to indicate if its necessary to order the resulsets before compare them use_percentage = False acceptable_difference = 0.01 print('==============================') print(queryType) print('==============================') queryId = 'TEST_01' query = "select c_custkey, c_acctbal from customer order by c_acctbal desc, c_custkey" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_02' query = "select c_acctbal from customer order by c_acctbal" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_03' query = "select c_custkey, c_nationkey, c_acctbal from customer order by c_nationkey, c_acctbal, c_custkey desc" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_04' query = "select c_custkey + c_nationkey, c_acctbal from customer order by 1 desc, 2" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) if Settings.execution_mode == ExecutionMode.GENERATOR: print("==============================") break
def executionTest(): tables = ['supplier', 'lineitem', 'partsupp', 'part'] data_types = [DataType.CUDF] # TODO csv orc parquet json #Create Tables ------------------------------------------------------------------------------------------------------------ for fileSchemaType in data_types: if skip_test(dask_client, nRals, fileSchemaType, queryType): continue cs.create_tables(bc, dir_data_lc, fileSchemaType, tables=tables) #Run Query ----------------------------------------------------------------------------- worder = 1 #Parameter to indicate if its necessary to order the resulsets before compare them use_percentage = False acceptable_difference = 0.01 print('==============================') print(queryType) print('==============================') queryId = 'TEST_01' query = """select p_partkey, p_mfgr from part where p_size = 35 and p_type like 'STEEL%'""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_02' query = """select s.s_acctbal, s.s_name, p.p_partkey, p.p_mfgr, s.s_address, s.s_phone, s.s_comment from part p inner join partsupp ps on ps.ps_partkey = p.p_partkey inner join supplier s on s.s_suppkey = ps.ps_suppkey where p.p_type like '%STEEL'""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_03' query = """SELECT 100.00* SUM(CASE WHEN p.p_type LIKE 'PROMO%' THEN l.l_extendedprice*(1-l.l_discount) ELSE 0 END) / SUM(l.l_extendedprice*(1-l.l_discount)) AS promo_revenue FROM lineitem l INNER JOIN part p ON l.l_partkey = p.p_partkey WHERE l.l_shipdate < '1995-09-01'""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) if Settings.execution_mode == ExecutionMode.GENERATOR: print("==============================") break
def executionTest(): tables = [ "nation", "region", "customer", "lineitem", "orders", "supplier" ] data_types = [ DataType.DASK_CUDF, DataType.CUDF, DataType.CSV, DataType.ORC, DataType.PARQUET, ] # TODO json # Create Tables ----------------------------------------------------- for fileSchemaType in data_types: if skip_test(dask_client, nRals, fileSchemaType, queryType): continue cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables) # Run Query ------------------------------------------------------ # Parameter to indicate if its necessary to order # the resulsets before compare them worder = 1 use_percentage = False acceptable_difference = 0 print("==============================") print(queryType) print("==============================") queryId = "TEST_01" query = """select nation.n_nationkey, region.r_regionkey from nation inner join region on region.r_regionkey = nation.n_nationkey""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_02" query = """select avg(CAST(c.c_custkey AS DOUBLE)), avg(CAST(c.c_nationkey AS DOUBLE)), n.n_regionkey from customer as c inner join nation as n on c.c_nationkey = n.n_nationkey group by n.n_regionkey""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", 0.01, use_percentage, fileSchemaType, ) queryId = "TEST_03" query = """select c.c_custkey, c.c_nationkey, n.n_regionkey from customer as c inner join nation as n on c.c_nationkey = n.n_nationkey where n.n_regionkey = 1 and c.c_custkey < 50""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_04" query = """select avg(CAST(c.c_custkey AS DOUBLE)), avg(c.c_acctbal), n.n_nationkey, r.r_regionkey from customer as c inner join nation as n on c.c_nationkey = n.n_nationkey inner join region as r on r.r_regionkey = n.n_regionkey group by n.n_nationkey, r.r_regionkey""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", 0.01, use_percentage, fileSchemaType, ) queryId = "TEST_05" query = """select n1.n_nationkey as supp_nation, n2.n_nationkey as cust_nation, l.l_extendedprice * l.l_discount from supplier as s inner join lineitem as l on s.s_suppkey = l.l_suppkey inner join orders as o on o.o_orderkey = l.l_orderkey inner join customer as c on c.c_custkey = o.o_custkey inner join nation as n1 on s.s_nationkey = n1.n_nationkey inner join nation as n2 on c.c_nationkey = n2.n_nationkey where n1.n_nationkey = 1 and n2.n_nationkey = 2 and o.o_orderkey < 10000""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", 0.01, use_percentage, fileSchemaType, ) queryId = "TEST_06" query = """SELECT n.n_nationkey + 1, n.n_regionkey from nation AS n inner join region AS r ON n.n_regionkey = r.r_regionkey""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_07" query = """SELECT n.n_nationkey + 1, n.n_regionkey from nation AS n INNER JOIN region AS r ON n.n_regionkey = r.r_regionkey and n.n_nationkey = 5""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_08" query = """select * from nation n1 inner join nation n2 on n1.n_nationkey = n2.n_nationkey""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_09" query = """select n1.n_nationkey, n2.n_nationkey from nation n1 inner join nation n2 on n1.n_nationkey = n2.n_nationkey""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_10" query = """select l.l_orderkey, l.l_linenumber, n.n_nationkey from lineitem as l inner join nation as n on l.l_orderkey = n.n_nationkey""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_11" query = """select c.c_custkey, c.c_nationkey, n.o_orderkey from customer as c inner join orders as n on c.c_custkey = n.o_custkey where n.o_orderkey < 100""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_12" query = """select c.c_custkey, c.c_nationkey, o.o_orderkey from customer as c inner join orders as o on c.c_custkey = o.o_custkey inner join nation as n on c.c_nationkey = n.n_nationkey order by c_custkey, o.o_orderkey""" runTest.run_query( bc, drill, query, queryId, queryType, 0, "", acceptable_difference, use_percentage, fileSchemaType, ) #ERROR: Different values GDF and PSV # queryId = "TEST_13" # query = """select c.c_name, o.o_orderkey, o.o_totalprice, # l.l_partkey, l.l_returnflag # from lineitem as l # inner join orders as o on o.o_orderkey = l.l_orderkey # inner join customer as c on c.c_custkey = o.o_custkey # and l.l_linenumber < 3 and c.c_custkey < 30""" # runTest.run_query( # bc, # drill, # query, # queryId, # queryType, # worder, # "", # acceptable_difference, # use_percentage, # fileSchemaType, # print_result=True, # ) #ERROR: Different values GDF and PSV # queryId = "TEST_14" # query = """select o.o_orderkey, o.o_totalprice, l.l_partkey # from lineitem as l # inner join orders as o on o.o_orderkey = l.l_orderkey * 2 # inner join customer as c on c.c_nationkey = o.o_custkey""" # runTest.run_query( # bc, # drill, # query, # queryId, # queryType, # worder, # "", # acceptable_difference, # use_percentage, # fileSchemaType, # print_result=True, # ) if Settings.execution_mode == ExecutionMode.GENERATOR: print("==============================") break
def executionTest(): tables = ['nation', 'region', 'supplier', 'customer', 'lineitem', 'orders', 'part'] data_types = [DataType.CUDF, DataType.CSV, DataType.ORC, DataType.PARQUET] # TODO parquet json #Create Tables ------------------------------------------------------------------------------------------------------------ for fileSchemaType in data_types: if skip_test(dask_client, nRals, fileSchemaType, queryType): continue cs.create_tables(bc, dir_data_lc, fileSchemaType, tables=tables) #Run Query ----------------------------------------------------------------------------- worder = 1 #Parameter to indicate if its necessary to order the resulsets before compare them use_percentage = False acceptable_difference = 0.01 print('==============================') print(queryType) print('==============================') queryId = 'TEST_01' query = """select p_partkey, p_retailprice, cast(cast(p_retailprice as VARCHAR) as DOUBLE) from part order by p_partkey limit 10""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_02' query = """select CAST(c_custkey as BIGINT), c_acctbal from customer order by c_acctbal desc, c_custkey""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_03' query = """select SUM(c_custkey), CAST(c_custkey as VARCHAR) from customer where c_custkey between 123 and 125 group by c_custkey""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_04' query = """select cast(o_totalprice AS DOUBLE) * o_orderkey from orders where o_orderkey between 990 and 1010""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_05' query = """select o_custkey, o_orderkey, cast(o_custkey AS FLOAT) * o_orderkey from orders where o_custkey between 998 and 1000 order by o_custkey, o_orderkey""" runTest.run_query(bc, drill, query, queryId, queryType, 0, '', acceptable_difference, True,fileSchemaType) queryId = 'TEST_06' query = """select cast(c_nationkey AS INTEGER) from customer where c_custkey < 100 and c_nationkey in (19, 20)""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_07' query = """select cast(o_orderkey AS FLOAT) * o_totalprice from orders where o_orderkey < 10 order by o_orderkey""" runTest.run_query(bc, drill, query, queryId, queryType, 0, '', acceptable_difference, True, fileSchemaType) queryId = 'TEST_08' query = """select cast(o_orderkey AS TINYINT) from orders where o_orderkey < 120""" runTest.run_query(bc, spark, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_09' query = """select cast(o_orderkey AS SMALLINT) from orders where o_orderkey < 32000""" runTest.run_query(bc, spark, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_10' query = """select cast(o_totalprice AS INTEGER) * o_orderkey from orders where o_orderkey < 10""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_11' query = """select cast(o_orderdate AS TIMESTAMP) from orders where o_orderkey < 10""" if fileSchemaType == DataType.ORC: runTest.run_query(bc, spark, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) else: runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) # TODO: FIx cast(o_orderdate AS DATE) when fileSchemaType is ORC queryId = 'TEST_12' query = """select cast(o_orderdate AS TIMESTAMP) from orders where cast(o_orderdate as TIMESTAMP) between '1995-01-01' and '1995-01-05'""" if fileSchemaType != DataType.ORC: runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
def executionTest(): tables = [ "nation", "region", "customer", "orders", "part", "partsupp", "supplier", ] data_types = [ DataType.DASK_CUDF, DataType.CUDF, DataType.CSV, DataType.ORC, DataType.PARQUET, ] # TODO json # Create Tables ----------------------------------------------------- for fileSchemaType in data_types: if skip_test(dask_client, nRals, fileSchemaType, queryType): continue cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables) # Run Query ------------------------------------------------------ # Parameter to indicate if its necessary to order # the resulsets before compare them worder = 1 use_percentage = False acceptable_difference = 0 print("==============================") print(queryType) print("==============================") queryId = "TEST_01" query = """with nationTemp as (select n_nationkey, n_regionkey as fkey from nation where n_nationkey > 3 order by n_nationkey) select region.r_regionkey, nationTemp.n_nationkey from region inner join nationTemp on region.r_regionkey = nationTemp.fkey""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_02" query = """with regionTemp as ( select r_regionkey from region where r_regionkey > 2 ), nationTemp as(select n_nationkey, n_regionkey as fkey from nation where n_nationkey > 3 order by n_nationkey) select regionTemp.r_regionkey, nationTemp.fkey from regionTemp inner join nationTemp on regionTemp.r_regionkey = nationTemp.fkey""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_03" query = """with ordersTemp as ( select min(o_orderkey) as priorityKey, o_custkey from orders group by o_custkey ), ordersjoin as( select orders.o_custkey from orders inner join ordersTemp on ordersTemp.priorityKey = orders.o_orderkey) select customer.c_custkey, customer.c_nationkey from customer inner join ordersjoin on ordersjoin.o_custkey = customer.c_custkey""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) # queryId = 'TEST_04' # query = """with ordersTemp as ( # select min(orders.o_orderkey) as priorityKey, # o_custkey from orders group by o_custkey # ), ordersjoin as( # select orders.o_orderkey, orders.o_custkey # / (orders.o_custkey + 1) as o_custkey, # (ordersTemp.priorityKey + 1) as priorityKey from orders # inner join ordersTemp on # (ordersTemp.priorityKey = orders.o_orderkey) # ) # select (customer.c_custkey + 1) # / (customer.c_custkey - customer.c_custkey + 1) from customer # inner join ordersjoin # on ordersjoin.o_custkey = customer.c_custkey # where (customer.c_custkey > 1 or customer.c_custkey < 100)""" # runTest.run_query(bc, drill, query, queryId, queryType, worder, # '', acceptable_difference, use_percentage, fileSchemaType) if Settings.execution_mode == ExecutionMode.GENERATOR: print("==============================") break
def executionTest(): tables = [ "orders", "lineitem", ] data_types = [DataType.ORC] # Create Tables ----------------------------------------------------- for fileSchemaType in data_types: if skip_test(dask_client, nRals, fileSchemaType, queryType): continue cs.create_tables(bc, dir_data_lc, fileSchemaType, tables=tables) # Run Query ------------------------------------------------------ # Parameter to indicate if its necessary to order # the resulsets before compare them worder = 1 use_percentage = False acceptable_difference = 0.01 print("==============================") print(queryType) print("==============================") queryId = "TEST_01" query = """select TO_DATE(cast(o_orderdate as varchar), '%Y-%m-%d %H:%M:%S') from orders""" query_spark = """select TO_DATE(cast(o_orderdate as string), 'yyyy-MM-dd HH:mm:ss') from orders""" runTest.run_query( bc, spark, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, query_spark=query_spark, ) queryId = "TEST_02" query = """select TO_TIMESTAMP(cast(o_orderdate as varchar), '%Y-%m-%d %H:%M:%S') from orders""" query_spark = """select TO_TIMESTAMP(cast(o_orderdate as string), 'yyyy-MM-dd HH:mm:ss') from orders""" runTest.run_query(bc, spark, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, query_spark=query_spark) queryId = "TEST_03" query = """select TO_DATE( substring(cast(l_shipdate as varchar), 1, 4) || '|' || substring(cast(l_commitdate as varchar), 6, 2) || '|' || '13', '%Y|%m|%d') from lineitem""" query_spark = """select TO_DATE( substring(cast(l_shipdate as string), 1, 4) || '|' || substring(cast(l_commitdate as string), 6, 2) || '|' || '13', 'yyyy|MM|dd') from lineitem""" runTest.run_query(bc, spark, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, query_spark=query_spark) queryId = "TEST_04" query = """select TO_TIMESTAMP( substring(cast(l_shipdate as varchar), 1, 4) || '|' || substring(cast(l_commitdate as varchar), 6, 2) || '|' || '13', '%Y|%m|%d') from lineitem""" query_spark = """select TO_TIMESTAMP( substring(cast(l_shipdate as string), 1, 4) || '|' || substring(cast(l_commitdate as string), 6, 2) || '|' || '13', 'yyyy|MM|dd') from lineitem""" runTest.run_query(bc, spark, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, query_spark=query_spark)
def executionTest(queryType): tables = cs.tpchTables data_types = [DataType.DASK_CUDF, DataType.CUDF, DataType.CSV, DataType.ORC, DataType.PARQUET] # TODO json bc, dask_client = init_context() #Create Tables ------------------------------------------------------------------------------------------------------------ for fileSchemaType in data_types: if skip_test(dask_client, nRals, fileSchemaType, queryType): continue cs.create_tables(bc, dir_data_file, fileSchemaType, tables) #Run Query ----------------------------------------------------------------------------- worder = 1 use_percentage = False acceptable_difference = 0.01 queryType = 'Aggregations without group by Test' print('==============================') print(queryType) print('==============================') queryId = 'TEST_07' query = "select COUNT(n1.n_nationkey) as n1key, COUNT(DISTINCT(n2.n_nationkey + n1.n_nationkey)) as n2key from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 10" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryType = 'Coalesce Test' print('==============================') print(queryType) print('==============================') queryId = 'TEST_02' query = "select COALESCE(orders.o_orderkey, 100), COALESCE(orders.o_totalprice, 0.01) from customer left outer join orders on customer.c_custkey = orders.o_custkey where customer.c_nationkey = 3 and customer.c_custkey < 500" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_03' query = "select COALESCE(orders.o_orderkey, customer.c_custkey), COALESCE(orders.o_totalprice, customer.c_acctbal) from customer left outer join orders on customer.c_custkey = orders.o_custkey where customer.c_nationkey = 3 and customer.c_custkey < 500" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_05' query = "select COUNT(DISTINCT(COALESCE(n1.n_regionkey,32))), AVG(COALESCE(n1.n_regionkey,32)) from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6 " #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) #WSM NEED TO REVISIT THIS queryId = 'TEST_06' query = "select SUM(COALESCE(n2.n_nationkey, 100)), COUNT(DISTINCT(COALESCE(n1.n_nationkey,32))), n2.n_regionkey as n1key from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6 GROUP BY n2.n_regionkey" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) #WSM NEED TO REVISIT THIS queryId = 'TEST_07' query = "select MIN(COALESCE(n.n_nationkey, r.r_regionkey)), MAX(COALESCE(n.n_nationkey, 8)) from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_08' query = "select AVG(COALESCE(n.n_nationkey, r.r_regionkey)), MAX(COALESCE(n.n_nationkey, 8)) , COUNT(COALESCE(n.n_nationkey, 12)), n.n_nationkey from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey GROUP BY n.n_nationkey" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_09' query = "select SUM(COALESCE(n2.n_nationkey, 100)), COUNT(DISTINCT(COALESCE(n1.n_nationkey,32))), COALESCE(n2.n_regionkey, 100) as n1key from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6 GROUP BY COALESCE(n2.n_regionkey, 100)" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) #WSM NEED TO REVISIT THIS queryType = 'Commom Table Expressions Test' print('==============================') print(queryType) print('==============================') queryId = 'TEST_04' query = """with ordersTemp as ( select min(orders.o_orderkey) as priorityKey, o_custkey from orders group by o_custkey ), ordersjoin as( select orders.o_orderkey, orders.o_custkey/(orders.o_custkey + 1) as o_custkey, (ordersTemp.priorityKey + 1) as priorityKey from orders inner join ordersTemp on ( ordersTemp.priorityKey = orders.o_orderkey) ) select (customer.c_custkey + 1)/(customer.c_custkey - customer.c_custkey + 1) from customer inner join ordersjoin on ordersjoin.o_custkey = customer.c_custkey where (customer.c_custkey > 1 or customer.c_custkey < 100)""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) #WSM NEED TO REVISIT THIS queryType = 'Count Distinc Test' print('==============================') print(queryType) print('==============================') queryId = 'TEST_07' query = "select count(distinct(o_custkey)), count(distinct(o_totalprice)), sum(o_orderkey) from orders group by o_custkey" #count(distinct(o_orderdate)), #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_08' query = "select COUNT(DISTINCT(n.n_nationkey)), AVG(r.r_regionkey) from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', 0.01, use_percentage, fileSchemaType) queryId = 'TEST_09' query = "select MIN(n.n_nationkey), MAX(r.r_regionkey), COUNT(DISTINCT(n.n_nationkey + r.r_regionkey)) from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_10' query = "select COUNT(DISTINCT(n1.n_nationkey)) as n1key, COUNT(DISTINCT(n2.n_nationkey)) as n2key from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_regionkey" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_11' query = "select r.r_regionkey, n.n_nationkey, COUNT(n.n_nationkey), COUNT(DISTINCT(r.r_regionkey)), SUM(DISTINCT(n.n_nationkey + r.r_regionkey)) from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey GROUP BY r.r_regionkey, n.n_nationkey" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_12' query = "select n1.n_regionkey, n2.n_nationkey, MIN(n1.n_regionkey), MAX(n1.n_regionkey), AVG(n2.n_nationkey) from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6 GROUP BY n1.n_regionkey, n2.n_nationkey" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryType = 'Count without group by Test' print('==============================') print(queryType) print('==============================') queryId = 'TEST_01' query = "select count(*), count(n_nationkey) from nation" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_02' query = "select count(n_nationkey), count(*) from nation group by n_nationkey" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryType = 'Predicates with nulls' print('==============================') print(queryType) print('==============================') queryId = 'TEST_06' query = """select COUNT(n.n_nationkey), AVG(r.r_regionkey) from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey WHERE n.n_regionkey IS NULL""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, print_result = True) queryId = 'TEST_07' query = """select n.n_nationkey, n.n_name, r.r_regionkey, r.r_name from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey WHERE r.r_name IS NULL""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_08' #Core dump al iniciar el query query = """select n.n_nationkey, n.n_name, r.r_regionkey, r.r_name from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey WHERE n.n_name IS NOT NULL""" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryType = 'Single Node From Local Test' queryId = 'TEST_01' query = "select count(*), count(n_nationkey) from nation" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_02' query = "select count(n_nationkey), count(*) from nation group by n_nationkey" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryType = 'Predicates with nulls' print('==============================') print(queryType) print('==============================') queryId = 'TEST_06' query = """select COUNT(n.n_nationkey), AVG(r.r_regionkey) from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey WHERE n.n_regionkey IS NULL""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, print_result = True) queryId = 'TEST_07' query = """select n.n_nationkey, n.n_name, r.r_regionkey, r.r_name from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey WHERE r.r_name IS NULL""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_08' #Core dump al iniciar el query query = """select n.n_nationkey, n.n_name, r.r_regionkey, r.r_name from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey WHERE n.n_name IS NOT NULL""" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryType = 'Single Node From Local Test' print('==============================') print(queryType) print('==============================') queryId = 'TEST_04' query = """select count(c_custkey), sum(c_acctbal), avg(c_acctbal), min(c_custkey), max(c_nationkey), (max(c_nationkey) + min(c_nationkey))/2 c_nationkey from customer where c_custkey < 100 group by c_nationkey""" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryType = 'Tables From Pandas Test' print('==============================') print(queryType) print('==============================') queryId = 'TEST_04' query = "select count(c_custkey), sum(c_acctbal), avg(c_acctbal), min(c_custkey), max(c_nationkey), (max(c_nationkey) + min(c_nationkey))/2 c_nationkey from customer where c_custkey < 100 group by c_nationkey" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', 0.01, use_percentage, fileSchemaType) queryType = 'Union Test' print('==============================') print(queryType) print('==============================') queryId = 'TEST_03' query = "(select o_orderkey, o_totalprice as key from orders where o_orderkey < 100) union all (select o_orderkey, o_custkey as key from orders where o_orderkey < 300 and o_orderkey >= 200)" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryType = 'Where clause Test' print('==============================') print(queryType) print('==============================') queryId = 'TEST_10' query = "select c_custkey, c_nationkey as nkey from customer where -c_nationkey + c_acctbal > 750.3" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_11' query = "select c_custkey, c_nationkey as nkey from customer where -c_nationkey + c_acctbal > 750" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_09' query = """select o_orderkey as okey, o_custkey as ckey, o_orderdate as odate from orders where o_orderstatus = 'O' and o_orderpriority = '1-URGENT' order by okey""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_10' query = """select max(o_totalprice) as max_price, min(o_orderdate) as min_orderdate from orders where o_orderdate = '1998-08-01'""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_10' query = """select max(o_totalprice) as max_price, min(o_orderdate) as min_orderdate from orders where o_orderdate > '1998-08-01'""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, print_result = True) queryType = 'New Queries' print('==============================') print(queryType) print('==============================') queryId = 'TEST_12' query = "select count(n1.n_nationkey) as n1key, count(n2.n_nationkey) as n2key, count(*) as cstar from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) print('==============================') print(queryType) print('==============================') queryId = 'TEST_04' query = """select count(c_custkey), sum(c_acctbal), avg(c_acctbal), min(c_custkey), max(c_nationkey), (max(c_nationkey) + min(c_nationkey))/2 c_nationkey from customer where c_custkey < 100 group by c_nationkey""" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryType = 'Tables From Pandas Test' print('==============================') print(queryType) print('==============================') queryId = 'TEST_04' query = "select count(c_custkey), sum(c_acctbal), avg(c_acctbal), min(c_custkey), max(c_nationkey), (max(c_nationkey) + min(c_nationkey))/2 c_nationkey from customer where c_custkey < 100 group by c_nationkey" #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', 0.01, use_percentage, fileSchemaType) queryType = 'Union Test' print('==============================') print(queryType) print('==============================') queryId = 'TEST_03' query = "(select o_orderkey, o_totalprice as key from orders where o_orderkey < 100) union all (select o_orderkey, o_custkey as key from orders where o_orderkey < 300 and o_orderkey >= 200)" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryType = 'Where clause Test' print('==============================') print(queryType) print('==============================') queryId = 'TEST_10' query = "select c_custkey, c_nationkey as nkey from customer where -c_nationkey + c_acctbal > 750.3" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_11' query = "select c_custkey, c_nationkey as nkey from customer where -c_nationkey + c_acctbal > 750" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryType = 'New Queries' print('==============================') print(queryType) print('==============================') queryId = 'TEST_12' query = "select count(n1.n_nationkey) as n1key, count(n2.n_nationkey) as n2key, count(*) as cstar from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryType = 'Concat Test' print('==============================') print(queryType) print('==============================') queryId = 'TEST_09' query = """select o.o_orderkey, c.c_name || '-' || (c.c_custkey + 1) , o.o_orderstatus from orders o inner join customer c on o.o_custkey = c.c_custkey where c.c_custkey < 20""" queryId = 'TEST_04' query = """select c_custkey, SUBSTRING(c_name, 1, 8) from customer where c_name between 'Customer#000000009' and 'Customer#0000000011'""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
def executionTest(): tables = ["partsupp", "customer", "nation"] data_types = [ DataType.DASK_CUDF, DataType.CUDF, DataType.CSV, DataType.ORC, DataType.PARQUET ] # TODO json #Create Tables ------------------------------------------------------------------------------------------------------------ for fileSchemaType in data_types: if skip_test(dask_client, nRals, fileSchemaType, queryType): continue cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables) #Run Query ----------------------------------------------------------------------------- worder = 1 #Parameter to indicate if its necessary to order the resulsets before compare them use_percentage = False acceptable_difference = 0.01 print('==============================') print(queryType) print('==============================') queryId = 'TEST_01' query = """select SUBSTRING(CAST(ps_partkey as VARCHAR),1,1), ps_availqty from partsupp where ps_availqty > 7000 and ps_supplycost > 700 order by ps_partkey, ps_availqty limit 50""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_02' query = """select c_custkey, c_name from customer where SUBSTRING(c_name,1,17) = 'Customer#00000000'""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_03' query = """select c_custkey, SUBSTRING(c_name, 1, 8) from customer where c_name = 'Customer#000000009'""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_04' query = """select * from nation where SUBSTRING(n_name,1,1) = 'I'""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_05' query = """select c_custkey, c_name, SUBSTRING(c_name,1,1), SUBSTRING(c_name,2,1), SUBSTRING(c_name,1,2), SUBSTRING(c_name,2,2) from customer where c_custkey < 20""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_06' query = """select c.c_custkey, SUBSTRING(c.c_name, 10, 18), CAST(SUBSTRING(c.c_name, 10, 18) as INT), CAST(SUBSTRING(c.c_name, 10, 18) as INT) + 1 from customer c where c.c_custkey < 50""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_07' query = """select c.c_custkey, SUBSTRING(c.c_name, 1, 8), SUBSTRING(c.c_name, 10, 18) || '**' from customer c where c.c_custkey < 0""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_08' query = """select * from ( select c.c_custkey, SUBSTRING(c.c_name, 1, 8) as n1, SUBSTRING(c.c_name, 10, 18) || '**' as n2 from customer c where c.c_custkey < 50 ) as n where SUBSTRING(n.n1, 1,7) = 'Customer'""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) if Settings.execution_mode == ExecutionMode.GENERATOR: print("==============================") break
def executionTest(): tables = [ "partsupp", "lineitem", "part", "supplier", "orders", "customer", "region", "nation", ] data_types = [ DataType.DASK_CUDF, DataType.CUDF, DataType.CSV, DataType.ORC, DataType.PARQUET, ] # TODO json # Create Tables ----------------------------------------------------- for fileSchemaType in data_types: if skip_test(dask_client, nRals, fileSchemaType, queryType): continue cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables) # Run Query ------------------------------------------------------ # Parameter to indicate if its necessary to order # the resulsets before compare them worder = 1 use_percentage = False acceptable_difference = 0.01 print("==============================") print(queryType) print("==============================") queryId = "TEST_01" query = """select c_mktsegment || ': ' || c_custkey || ' - ' || c_name from customer order by c_custkey, c_mktsegment limit 50""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_02" query = """select r.r_name || ' ' || n.n_name from region r inner join nation n on n.n_regionkey = r.r_regionkey order by r.r_name""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_03" query = """select c.c_name || ' ' || o.o_orderkey, o.o_orderstatus from orders o inner join customer c on o.o_custkey = c.c_custkey where c.c_custkey < 10""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_04" query = """select c.c_name, o.o_orderkey, o.o_orderstatus from orders o inner join customer c on o.o_custkey = c.c_custkey where 'Customer#000000' || c.c_custkey like 'Customer#0000001'""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_05" query = """select c_custkey, 'Cliente: ' || c_name from customer order by c_custkey, c_mktsegment limit 50""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_06" query = """select o.o_orderkey || c.c_name, o.o_orderstatus from orders o inner join customer c on o.o_custkey = c.c_custkey where c.c_custkey < 10""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_07" query = """select o.o_orderkey, c.c_name || cast(c.c_custkey as VARCHAR), c.c_name || '-' || cast(c.c_custkey as VARCHAR), o.o_orderstatus from orders o inner join customer c on o.o_custkey = c.c_custkey where c.c_custkey < 10""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_08" query = """select c.c_name || ': ' || c.c_custkey, c.c_name || ': ' || c.c_comment from customer c where c.c_custkey < 10""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_09" query = """select o.o_orderkey, c.c_name || '-' || (c.c_custkey + 1), o.o_orderstatus from orders o inner join customer c on o.o_custkey = c.c_custkey where c.c_custkey < 20""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_10" query = """select * from ( select c.c_custkey, 'Customer#000000' || c.c_custkey as n_name from customer c where c.c_custkey < 10 ) as n where n.n_name = 'Customer#000000' || n.c_custkey""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_11" query = """select c.c_custkey, c.c_name || '- ' || c.c_custkey, c.c_comment from customer c where c.c_custkey < 0""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) if Settings.execution_mode == ExecutionMode.GENERATOR: print("==============================") break
def executionTest(): tables = ["orders", "customer", "partsupp", "lineitem"] data_types = [ DataType.DASK_CUDF, DataType.CUDF, DataType.CSV, DataType.ORC, DataType.PARQUET ] # TODO json #Create Tables ------------------------------------------------------------------------------------------------------------ for fileSchemaType in data_types: if skip_test(dask_client, nRals, fileSchemaType, queryType): continue cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables) #Run Query ----------------------------------------------------------------------------- worder = 0 use_percentage = False acceptable_difference = 0.01 print('==============================') print(queryType) print('==============================') queryId = 'TEST_01' query = "select o_orderkey from orders order by 1 limit 10" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_02' query = "select o_orderdate, o_orderkey, o_clerk from orders order by o_orderdate, o_orderkey, o_custkey, o_orderstatus, o_clerk limit 1000" query_spark = "select o_orderdate, o_orderkey, o_clerk from orders order by o_orderdate nulls last, o_orderkey nulls last, o_custkey nulls last, o_orderstatus nulls last, o_clerk nulls last limit 1000" if fileSchemaType == DataType.ORC: runTest.run_query(bc, spark, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType, query_spark=query_spark) else: runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_03' query = """select o_orderkey from orders where o_custkey < 300 and o_orderdate >= '1990-08-01' order by o_orderkey limit 50""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_04' query = """select ps_partkey, ps_availqty from partsupp where ps_availqty < 3 and ps_availqty >= 1 order by ps_partkey, ps_availqty limit 50""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) # queryId = 'TEST_05' # query = """select o_orderkey, o_orderstatus from orders where o_custkey < 10 and o_orderstatus = 'O' # order by o_orderkey, o_orderstatus limit 50""" # runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_06' query = """select orders.o_totalprice, customer.c_name from orders inner join customer on orders.o_custkey = customer.c_custkey order by customer.c_name, orders.o_orderkey limit 10""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_07' query = """(select l_shipdate, l_orderkey, l_linestatus from lineitem where l_linenumber = 1 order by 1, 2, 3, l_linenumber limit 10) union all (select l_shipdate, l_orderkey, l_linestatus from lineitem where l_linenumber = 1 order by 1 desc, 2, 3, l_linenumber limit 10)""" query_spark = """(select l_shipdate, l_orderkey, l_linestatus from lineitem where l_linenumber = 1 order by 1 nulls last, 2 nulls last, 3 nulls last, l_linenumber nulls last limit 10) union all (select l_shipdate, l_orderkey, l_linestatus from lineitem where l_linenumber = 1 order by 1 desc nulls first, 2 nulls last, 3 nulls last, l_linenumber nulls last limit 10)""" if fileSchemaType == DataType.ORC: runTest.run_query(bc, spark, query, queryId, queryType, 1, '', acceptable_difference, use_percentage, fileSchemaType, query_spark=query_spark) else: runTest.run_query(bc, drill, query, queryId, queryType, 1, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_08' query = "select c_custkey from customer where c_custkey < 0 order by c_custkey limit 40" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_09' query = "select c_custkey, c_name from customer where c_custkey < 10 order by 1 limit 30" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_10' query = "select c_custkey, c_name from customer where c_custkey < 10 limit 30" runTest.run_query(bc, drill, query, queryId, queryType, 1, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_11' query = "select avg(CAST(c_custkey AS DOUBLE)), min(c_custkey) from customer limit 5" runTest.run_query(bc, drill, query, queryId, queryType, 1, '', acceptable_difference, use_percentage, fileSchemaType)
def executionTest(): tables = ["customer", "orders", "nation", "region"] data_types = [ DataType.DASK_CUDF, DataType.CUDF, DataType.CSV, DataType.ORC, DataType.PARQUET, ] # TODO json # Create Tables ----------------------------------------------------- for fileSchemaType in data_types: if skip_test(dask_client, nRals, fileSchemaType, queryType): continue cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables) # Run Query ------------------------------------------------------ # Parameter to indicate if its necessary to order # the resulsets before compare them worder = 1 use_percentage = False acceptable_difference = 0.01 print("==============================") print(queryType) print("==============================") queryId = "TEST_01" query = """select o_orderkey, sum(o_totalprice)/count(o_orderstatus) from orders where o_custkey < 100 group by o_orderstatus, o_orderkey""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_02" query = """select o_orderkey, o_orderstatus from orders where o_custkey < 10 and o_orderstatus <> 'O' order by o_orderkey, o_orderstatus limit 50""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_03" query = """select count(o_orderstatus) from orders where o_orderstatus <> 'O'""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_04" query = """select count(o_orderkey), sum(o_orderkey), o_clerk from orders where o_custkey < 1000 group by o_clerk, o_orderstatus""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_05" query = """select avg(CAST(o_orderkey AS DOUBLE)) from orders group by o_orderstatus""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_06" query = """select count(o_shippriority), sum(o_totalprice) from orders group by o_shippriority""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_07" query = """with regionTemp as ( select r_regionkey, r_name from region where r_regionkey > 2 ), nationTemp as ( select n_nationkey, n_regionkey as fkey, n_name from nation where n_nationkey > 3 order by n_nationkey ) select regionTemp.r_name, nationTemp.n_name from regionTemp inner join nationTemp on regionTemp.r_regionkey = nationTemp.fkey""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_08" query = """select c_custkey, CHAR_LENGTH(c_comment) from customer where MOD(CHAR_LENGTH(c_comment), 7) = 0""" runTest.run_query( bc, spark, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_09" query = "select sum(CHAR_LENGTH(c_comment)) from customer" runTest.run_query( bc, spark, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) if Settings.execution_mode == ExecutionMode.GENERATOR: print("==============================") break
def executionTest(): tables = [ "customer", "part", "region", "nation", "orders", "supplier", "partsupp" ] data_types = [DataType.JSON] # Create Tables ----------------------------------------------------- for fileSchemaType in data_types: if skip_test(dask_client, nRals, fileSchemaType, queryType): continue cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables) # Run Query ----------------------------------------------------- # Parameter to indicate if its necessary to order the # resulsets before compare them worder = 1 use_percentage = False acceptable_difference = 0.01 print("==============================") print(queryType) print("==============================") queryId = "TEST_01" query = """select MIN(n.n_nationkey), MAX(r.r_regionkey), AVG(n.n_nationkey + r.r_regionkey) from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_02" query = """select SUM(n1.n_nationkey) as n1key, AVG(n2.n_nationkey + n1.n_nationkey ) as n2key from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 10""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_03" query = """select o_totalprice, o_custkey, case when o_totalprice > 100000.2 then o_totalprice else null end from orders where o_orderkey < 20""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_04" query = """select cast(o_orderdate AS TIMESTAMP) from orders where cast(o_orderdate as TIMESTAMP) between '1995-01-01' and '1995-01-05'""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_05" query = """ WITH t1_l AS ( SELECT * FROM orders ), t1_r AS ( SELECT * FROM customer ), main_lr AS( SELECT COALESCE(o.o_comment, c.c_comment) AS info FROM t1_l o FULL JOIN t1_r c ON o.o_custkey = c.c_custkey AND o.o_orderkey = c.c_nationkey ) SELECT * FROM main_lr """ runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_06" query = """select o.o_orderkey, c.c_name || cast(c.c_custkey as VARCHAR), c.c_name || '-' || cast(c.c_custkey as VARCHAR), o.o_orderstatus from orders o inner join customer c on o.o_custkey = c.c_custkey where c.c_custkey < 10""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_07" query = """select n1.n_regionkey, n2.n_nationkey, MIN(n1.n_regionkey), MAX(n1.n_regionkey), AVG(n2.n_nationkey) from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6 GROUP BY n1.n_regionkey, n2.n_nationkey""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_08" query = """SELECT c_custkey, count(c_nationkey), min(c_nationkey), sum(c_nationkey) from customer group by c_custkey""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_09" query = """select o_orderkey as okey, o_custkey as ckey, (EXTRACT(YEAR FROM o_orderdate) - 5) from orders where o_orderstatus = 'O' order by okey""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_10" query = """select n1.n_nationkey as n1key, n2.n_nationkey as n2key, n1.n_nationkey + n2.n_nationkey from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6 where n1.n_nationkey < 10 and n1.n_nationkey > 5""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_11" query = """select n1.n_nationkey as n1key, n2.n_nationkey as n2key, n1.n_nationkey + n2.n_nationkey from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6 and n1.n_nationkey + 1 = n2.n_nationkey + 7 and n1.n_nationkey + 2 = n2.n_nationkey + 8""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_12" query = """select count(c_custkey) + sum(c_acctbal) + avg(c_acctbal), min(c_custkey) - max(c_nationkey), c_nationkey * 2 as key from customer where c_nationkey * 2 < 40 group by c_nationkey * 2""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_13" query = """select c.c_custkey, r.r_regionkey, c.c_custkey + r.r_regionkey as addy from customer as c inner join region as r on c.c_nationkey = r.r_regionkey where c.c_acctbal < 1000 group by r.r_regionkey, c.c_custkey order by c.c_custkey desc""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_14" query = """SELECT n.n_nationkey + 1, n.n_regionkey from nation AS n INNER JOIN region AS r ON n.n_regionkey = r.r_regionkey and n.n_nationkey = 5""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_15" query = """select n.n_nationkey, r.r_regionkey from nation as n left outer join region as r on n.n_regionkey = r.r_regionkey where n.n_nationkey < 10 and n.n_nationkey > 5""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "n_nationkey", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_16" query = """select partSuppTemp.partKey, partAnalysis.avgSize from ( select min(p_partkey) as partKey, avg(CAST(p_size AS DOUBLE)) as avgSize, max(p_retailprice) as maxPrice, min(p_retailprice) as minPrice from part ) as partAnalysis inner join ( select ps_partkey as partKey, ps_suppkey as suppKey from partsupp where ps_availqty > 2 ) as partSuppTemp on partAnalysis.partKey = partSuppTemp.partKey inner join ( select s_suppkey as suppKey from supplier ) as supplierTemp on supplierTemp.suppKey = partSuppTemp.suppKey""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_17" query = """ select p.p_brand, p.p_type, p.p_size, count(ps.ps_suppkey) as supplier_cnt from partsupp ps inner join part p on p.p_partkey = ps.ps_partkey where p.p_brand <> 'Brand#45' and p.p_size in (49, 14, 23, 45, 19, 3, 36, 9) and ps.ps_supplycost < p.p_retailprice group by p.p_brand, p.p_type, p.p_size order by supplier_cnt desc, p.p_brand, p.p_type, p.p_size""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_18" query = """select c_custkey + c_nationkey, c_acctbal from customer order by 1 desc, 2""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_19" query = """(select o_orderkey, o_custkey from orders where o_orderkey < 100 ) union all (select o_orderkey, o_custkey from orders where o_orderkey < 300 and o_orderkey >= 200) order by 2""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "o_orderkey", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_20" query = """select avg(CAST(c_custkey AS DOUBLE)), min(c_custkey) from customer limit 5""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) if Settings.execution_mode == ExecutionMode.GENERATOR: print("==============================") break
def executionTest(queryType): tables = ["nation", "region", "customer", "orders", "lineitem"] # TODO json data_types = [ DataType.DASK_CUDF, DataType.CUDF, DataType.CSV, DataType.ORC, DataType.PARQUET, ] # TODO json # Create Tables ----------------------------------------------------- for fileSchemaType in data_types: if skip_test(dask_client, nRals, fileSchemaType, queryType): continue cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables) # Run Query ------------------------------------------------------ # Parameter to indicate if its necessary to order # the resulsets before compare them worder = 1 use_percentage = False acceptable_difference = 0.01 print("==============================") print(queryType) print("==============================") queryId = "TEST_01" query = """select n.n_nationkey, COALESCE(r.r_regionkey,-1) from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey where n.n_nationkey < 10""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_02" query = """select COALESCE(orders.o_orderkey, 100), COALESCE(orders.o_totalprice, 0.01) from customer left outer join orders on customer.c_custkey = orders.o_custkey where customer.c_nationkey = 3 and customer.c_custkey < 500""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_03" query = """select COALESCE(orders.o_orderkey, customer.c_custkey), COALESCE(orders.o_totalprice, customer.c_acctbal) from customer left outer join orders on customer.c_custkey = orders.o_custkey where customer.c_nationkey = 3 and customer.c_custkey < 500""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_04" query = """select customer.c_custkey, orders.o_orderkey, COALESCE(orders.o_custkey,123456) from customer left outer join orders on customer.c_custkey = orders.o_custkey where customer.c_nationkey = 3 and customer.c_custkey < 500""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_05" query = """select COUNT(DISTINCT(COALESCE(n1.n_regionkey, 32))), AVG(CAST(COALESCE(n1.n_regionkey, 32) as float)) from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_06" query = """select SUM(COALESCE(n2.n_nationkey, 100)), COUNT(DISTINCT(COALESCE(n1.n_nationkey,32))), n2.n_regionkey as n1key from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6 GROUP BY n2.n_regionkey""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_07" query = """select MIN(COALESCE(n.n_nationkey, r.r_regionkey)), MAX(COALESCE(n.n_nationkey, 8)) from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_08" query = """select AVG(CAST(COALESCE(n.n_nationkey, r.r_regionkey) AS DOUBLE)), MAX(COALESCE(n.n_nationkey, 8)), COUNT(COALESCE(n.n_nationkey, 12)), n.n_nationkey from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey GROUP BY n.n_nationkey""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = 'TEST_09' query = """select SUM(COALESCE(n2.n_nationkey, 100)), COUNT(DISTINCT(COALESCE(n1.n_nationkey,32))), COALESCE(n2.n_regionkey, 100) as n1key from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6 GROUP BY COALESCE(n2.n_regionkey, 100)""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_10" query = "SELECT COALESCE(l_shipinstruct, l_comment) FROM lineitem" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_11" query = """select n.n_nationkey, COALESCE(r.r_comment, n.n_comment) from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_12" query = """SELECT COALESCE(l.l_shipinstruct, o.o_orderstatus) FROM lineitem l inner join orders o on l.l_orderkey = o.o_orderkey where o.o_totalprice < 1574.23""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_13" query = """ WITH t1_l AS ( SELECT * FROM orders ), t1_r AS ( SELECT * FROM customer ), main_lr AS( SELECT COALESCE(o.o_comment, c.c_comment) AS info FROM t1_l o FULL JOIN t1_r c ON o.o_custkey = c.c_custkey AND o.o_orderkey = c.c_nationkey ) SELECT * FROM main_lr """ runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_14" query = """ WITH ltable3 AS ( select lineitem.l_orderkey as orderkey, lineitem.l_linestatus as linestatus from lineitem where mod(lineitem.l_orderkey, 2) = 0 ), rtable1 AS ( select lineitem.l_orderkey as orderkey, lineitem.l_linestatus as linestatus from lineitem where mod(lineitem.l_partkey, 6) = 0 ), rtable2 AS ( select lineitem.l_orderkey as orderkey, lineitem.l_linestatus as linestatus from lineitem where mod(lineitem.l_suppkey, 4) = 0 ), rtable3 AS ( select coalesce(l.orderkey, r.orderkey) as orderkey, coalesce(l.linestatus, r.linestatus) as linestatus from rtable1 l full join rtable2 r on l.orderkey = r.orderkey -- and l.linestatus = r.linestatus ), lastjoin AS ( select l.orderkey, coalesce(l.linestatus, r.linestatus) as linestatus from ltable3 l full join rtable3 r on l.orderkey = r.orderkey and l.linestatus = r.linestatus ) select * from lastjoin """ runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_15" query = """select n.n_nationkey, COALESCE(r.r_regionkey,-1) from nation as n right outer join region as r on n.n_nationkey = r.r_regionkey where n.n_nationkey < 10""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_16" query = """select COALESCE(orders.o_orderkey, 100), COALESCE(orders.o_totalprice, 0.01) from customer right outer join orders on customer.c_custkey = orders.o_custkey where customer.c_nationkey = 3 and customer.c_custkey < 500""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_17" query = """select MIN(COALESCE(n.n_nationkey, r.r_regionkey)), MAX(COALESCE(n.n_nationkey, 8)) from nation as n right outer join region as r on n.n_nationkey = r.r_regionkey""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_18" query = """select AVG(CAST(COALESCE(n.n_nationkey, r.r_regionkey) AS DOUBLE)), MAX(COALESCE(n.n_nationkey, 8)), COUNT(COALESCE(n.n_nationkey, 12)), n.n_nationkey from nation as n right outer join region as r on n.n_nationkey = r.r_regionkey GROUP BY n.n_nationkey""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) if Settings.execution_mode == ExecutionMode.GENERATOR: print("==============================") break
def executionTest(): tables = ["lineitem", "orders", "nation", "region"] data_types = [ DataType.DASK_CUDF, DataType.CUDF, DataType.CSV, DataType.PARQUET, ] # TODO json for fileSchemaType in data_types: if skip_test(dask_client, nRals, fileSchemaType, queryType): continue cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables) # Run Query ------------------------------------------------------ # Parameter to indicate if its necessary to order # the resulsets before compare them worder = 1 use_percentage = False acceptable_difference = 0.01 print("==============================") print(queryType) print("==============================") queryId = "TEST_01" query = """select o_orderkey, DAYOFWEEK(o_orderdate) as day_of_week from orders where o_orderkey < 250 order by o_orderkey""" runTest.run_query( bc, spark, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_02" query = """select o_orderkey, o_totalprice, DAYOFWEEK(o_orderdate) as day_of_week from orders where o_orderkey < 1850 and DAYOFWEEK(o_orderdate) = 6 order by o_orderkey""" runTest.run_query( bc, spark, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_03" query = """select o_orderkey, case when DAYOFWEEK(o_orderdate) = 6 OR DAYOFWEEK(o_orderdate) = 7 then 'Weekend' else 'Weekday' end as day_of_week from orders where o_orderkey > 5450 order by o_orderkey""" runTest.run_query( bc, spark, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_04" query = """ with dayofweektable as ( select o_orderkey, DAYOFWEEK(o_orderdate) as num_of_week from orders ) select o_orderkey, num_of_week, case when num_of_week = 1 then 'Mon' when num_of_week = 2 then 'Tue' when num_of_week = 3 then 'Wed' when num_of_week = 4 then 'Thu' when num_of_week = 5 then 'Fri' when num_of_week = 6 then 'Sat' else 'Sun' end as day_of_week from dayofweektable order by o_orderkey limit 100""" runTest.run_query( bc, spark, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_05" query = """with ordersdaystable as ( select o_orderkey as key, DAYOFWEEK(o_orderdate) as num_of_week from orders ), lineitemdaystable as ( select l_orderkey as key, DAYOFWEEK(l_shipdate) as num_of_week from lineitem ) select 'Saturday' as day_, count(o.num_of_week) as n_days from ordersdaystable as o inner join lineitemdaystable as l ON o.key = l.key where l.num_of_week = 6 """ runTest.run_query( bc, spark, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_06" query = """with ordersperutable as ( select o_orderkey, DAYOFWEEK(o_orderdate) as num_of_week, n_name as country from orders inner join nation on DAYOFWEEK(o_orderdate) = n_nationkey where n_name in ('PERU', 'ARGENTINA', 'BRAZIL', 'UNITED STATES') ), lineitemamericatable as ( select l_orderkey, DAYOFWEEK(l_shipdate) as num_of_week, r_name as region from lineitem inner join region on DAYOFWEEK(l_shipdate) = r_regionkey where r_name = 'AMERICA' ) select o_orderkey, o.num_of_week as num_day_o, case when o.num_of_week = 1 then 'Mon' when o.num_of_week = 2 then 'Tue' when o.num_of_week = 3 then 'Wed' when o.num_of_week = 4 then 'Thu' when o.num_of_week = 5 then 'Fri' when o.num_of_week = 6 then 'Sat' else 'Sun' end as day_of_week from ordersperutable as o inner join lineitemamericatable as l ON o_orderkey = l_orderkey where o.num_of_week <> 7 and l.num_of_week <> 7 order by o_orderkey, o.num_of_week limit 75""" runTest.run_query( bc, spark, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) if Settings.execution_mode == ExecutionMode.GENERATOR: print("==============================") break
def executionTest(): tables = ["customer", "nation"] data_types = [ DataType.DASK_CUDF, DataType.CUDF, DataType.CSV, DataType.ORC, DataType.PARQUET, ] # TODO json # Create Tables ----------------------------------------------------- for fileSchemaType in data_types: if skip_test(dask_client, nRals, fileSchemaType, queryType): continue cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables) # Run Query ------------------------------------------------------ # Parameter to indicate if its necessary to order # the resulsets before compare them worder = 1 use_percentage = False acceptable_difference = 0.01 print("==============================") print(queryType) print("==============================") queryId = "TEST_01" query = """select sin(c_acctbal), cos(c_acctbal), asin(c_acctbal), acos(c_acctbal), ln(c_acctbal), tan(c_acctbal), atan(c_acctbal), floor(c_acctbal), ceil(c_acctbal), c_acctbal from customer""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_02" query = """select sin(c_acctbal), cos(c_acctbal), asin(c_acctbal), acos(c_acctbal), c_acctbal from customer""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_03" query = """select sin(c_acctbal), cos(c_acctbal), asin(c_acctbal), acos(c_acctbal), ln(c_acctbal), c_acctbal from customer""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_04" query = """select sin(c_acctbal), cos(c_acctbal), asin(c_acctbal), acos(c_acctbal), ln(c_acctbal), tan(c_acctbal), atan(c_acctbal), c_acctbal from customer""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_05" query = """select sin(c_acctbal), cos(c_acctbal), asin(c_acctbal), acos(c_acctbal), ln(c_acctbal), tan(c_acctbal), atan(c_acctbal), floor(c_acctbal), c_acctbal from customer""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_06" query = "select floor(c_acctbal), c_acctbal from customer" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) # This is not considered an unaryOp (-) query but need to be considered queryId = "TEST_07" query = "select n_nationkey, -n_nationkey from nation" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) # This is not considered an unaryOp (-) query but need to be considered queryId = "TEST_08" query = "select -(cast(n_nationkey as double)) from nation" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) if Settings.execution_mode == ExecutionMode.GENERATOR: print("==============================") break
def executionTest(): tables = ["nation", "region", "orders", "lineitem"] data_types = [ DataType.DASK_CUDF, DataType.CUDF, DataType.CSV, DataType.ORC, DataType.PARQUET, ] # TODO json # Create Tables ----------------------------------------------------- for fileSchemaType in data_types: if skip_test(dask_client, nRals, fileSchemaType, queryType): continue cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables) # Run Query ------------------------------------------------------ # Parameter to indicate if its necessary to order # the resulsets before compare them worder = 1 use_percentage = False acceptable_difference = 0 print("==============================") print(queryType) print("==============================") queryId = "TEST_01" query = """select n.n_nationkey, r.r_regionkey from nation as n right outer join region as r on n.n_nationkey = r.r_regionkey where n.n_nationkey < 10""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "n_nationkey", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_02" query = """select n.n_nationkey, r.r_regionkey, n.n_nationkey + r.r_regionkey from nation as n right outer join region as r on n.n_nationkey = r.r_regionkey where n.n_nationkey < 10""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "n_nationkey", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_03" query = """select n.n_nationkey, r.r_regionkey from nation as n right outer join region as r on n.n_regionkey = r.r_regionkey where n.n_nationkey < 10""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "n_nationkey", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_04" query = """select n.n_nationkey, r.r_regionkey from nation as n right outer join region as r on n.n_regionkey = r.r_regionkey where n.n_nationkey < 10 and n.n_nationkey > 5""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "n_nationkey", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_05" query = """select l.l_orderkey, l.l_partkey, l.l_quantity, o.o_totalprice, o.o_clerk from lineitem as l right outer join orders as o on l.l_orderkey = o.o_orderkey where o.o_totalprice < 87523.2 and l.l_returnflag in ('A', 'R') order by o.o_totalprice""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", 0.01, use_percentage, fileSchemaType, ) if Settings.execution_mode == ExecutionMode.GENERATOR: print("==============================") break
def executionTest(): tables = [ "nation", "region", "customer", "lineitem", "orders", "supplier" ] data_types = [ DataType.DASK_CUDF, DataType.CUDF, DataType.CSV, DataType.ORC, DataType.PARQUET, ] # TODO json # Create Tables ----------------------------------------------------- for fileSchemaType in data_types: if skip_test(dask_client, nRals, fileSchemaType, queryType): continue cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables) # Run Query ------------------------------------------------------ # Parameter to indicate if its necessary to order # the resulsets before compare them worder = 1 use_percentage = False acceptable_difference = 0 print("==============================") print(queryType) print("==============================") queryId = "TEST_01" query = """select MIN(n.n_nationkey), MAX(r.r_regionkey), AVG(CAST((n.n_nationkey + r.r_regionkey) AS DOUBLE)) from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey where n.n_nationkey IS NULL""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, print_result=True, ) queryId = "TEST_02" query = """select SUM(n1.n_nationkey) as n1key, AVG(CAST((n2.n_nationkey + n1.n_nationkey) AS DOUBLE)) as n2key from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 10 where n1.n_nationkey IS NOT NULL""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", 0.01, use_percentage, fileSchemaType, ) queryId = "TEST_03" query = """select COUNT(n1.n_nationkey) as n1key, COUNT(n2.n_nationkey + n1.n_nationkey) as n2key from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 10 where n1.n_nationkey IS NOT NULL""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_04" query = """select COUNT(n1.n_regionkey), AVG(CAST(n1.n_regionkey AS DOUBLE)) from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6 WHERE n1.n_regionkey IS NOT NULL""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", 0.01, use_percentage, fileSchemaType, ) queryId = "TEST_05" query = """select MIN(n.n_nationkey), MAX(n.n_nationkey) from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey WHERE n.n_nationkey IS NULL""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, print_result=True, ) # queryId = 'TEST_06' # query = """select COUNT(n.n_nationkey), AVG(r.r_regionkey) # from nation as n left outer join region as r # on n.n_nationkey = r.r_regionkey # WHERE n.n_regionkey IS NULL""" # runTest.run_query(bc, drill, query, queryId, queryType, # worder, '', acceptable_difference, use_percentage, # fileSchemaType, print_result = True) queryId = "TEST_07" query = """select n.n_nationkey, n.n_name, r.r_regionkey, r.r_name from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey WHERE r.r_name IS NULL""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_08" query = """select n.n_nationkey, n.n_name, r.r_regionkey, r.r_name from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey WHERE n.n_name IS NOT NULL""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) if Settings.execution_mode == ExecutionMode.GENERATOR: print("==============================") break
def executionTest(): tables = ["nation", "region", "customer", "orders", "lineitem"] data_types = [ DataType.DASK_CUDF, DataType.CUDF, DataType.CSV, DataType.ORC, DataType.PARQUET ] # TODO json #Create Tables ------------------------------------------------------------------------------------------------------------ for fileSchemaType in data_types: if skip_test(dask_client, nRals, fileSchemaType, queryType): continue cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables) #Run Query ----------------------------------------------------------------------------- worder = 1 #Parameter to indicate if its necessary to order the resulsets before compare them use_percentage = False acceptable_difference = 0.01 print('==============================') print(queryType) print('==============================') queryId = 'TEST_01' query = """select EXTRACT(YEAR FROM l_receiptdate) - EXTRACT(YEAR FROM l_shipdate) as years_late, EXTRACT(MONTH FROM l_receiptdate) - EXTRACT(MONTH FROM l_shipdate) as months_late, EXTRACT(DAY FROM l_receiptdate) - EXTRACT(DAY FROM l_shipdate) as days_late from lineitem where l_shipdate < DATE '1993-01-01'""" if fileSchemaType == DataType.ORC: runTest.run_query(bc, spark, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) else: runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_02' query = "select o_orderkey as okey, o_custkey as ckey, (EXTRACT(YEAR FROM o_orderdate) - 5) from orders where o_orderstatus = 'O' order by okey" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_03' query = """select orders.o_orderkey, orders.o_orderdate, orders.o_orderstatus from orders inner join lineitem on lineitem.l_orderkey = orders.o_orderkey where orders.o_orderkey < 30 and lineitem.l_orderkey < 20 order by orders.o_orderkey, lineitem.l_linenumber, orders.o_custkey, lineitem.l_orderkey""" if fileSchemaType == DataType.ORC: runTest.run_query(bc, spark, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) else: runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_04' query = """select customer.c_nationkey, customer.c_name, orders.o_orderdate, lineitem.l_receiptdate from customer left outer join orders on customer.c_custkey = orders.o_custkey inner join lineitem on lineitem.l_orderkey = orders.o_orderkey where customer.c_nationkey = 3 and customer.c_custkey < 100 and orders.o_orderdate < '1990-01-01' order by orders.o_orderkey, lineitem.l_linenumber""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_05' query = """select orders.o_orderkey, orders.o_orderdate, lineitem.l_receiptdate, orders.o_orderstatus from orders inner join lineitem on lineitem.l_receiptdate = orders.o_orderdate where orders.o_orderkey < 30 and lineitem.l_orderkey < 20 order by orders.o_orderkey, lineitem.l_linenumber""" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
def executionTest(): #Read Data TPCH------------------------------------------------------------------------------------------------------------ authority = 'hdfsdisk' ktoken = '../KrbHDFS/myconf/krb5cc_0' krbticket = os.path.abspath(ktoken) hdfs_host = '172.22.0.3' hdfs_port = 9000 hdfs_driver = 'libhdfs' print("Using krb ticket: " + krbticket) result, error_msg, fs = bc.hdfs(authority, host=hdfs_host, port=hdfs_port, user='******', driver=hdfs_driver, kerb_ticket=krbticket) if result == False: msg = "WARNING: Could not connect to HDFS instance %s:%d using driver %s, error was: %s" % ( hdfs_host, hdfs_port, hdfs_driver, error_msg) print(msg) print("WARNING: Will ignore " + queryType) return print("Success connection to HDFS:") print(fs) hdfs_dir_data_lc = "hdfs://" + authority + dir_data_lc print("TPCH files at: " + hdfs_dir_data_lc) tables = [ 'nation', 'region', 'supplier', 'customer', 'lineitem', 'orders' ] data_types = [DataType.CSV, DataType.ORC, DataType.PARQUET] # TODO parquet json for fileSchemaType in data_types: if skip_test(dask_client, nRals, fileSchemaType, queryType): continue cs.create_tables(bc, hdfs_dir_data_lc, fileSchemaType, tables=tables) # Run Query ----------------------------------------------------------------------------- worder = 1 # Parameter to indicate if its necessary to order the resulsets before compare them use_percentage = False acceptable_difference = 0.01 print('==============================') print(queryType) print('==============================') queryId = 'TEST_01' query = "select count(c_custkey) as c1, count(c_acctbal) as c2 from customer" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_02' query = "select count(n_nationkey), count(n_regionkey) from nation" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_03' query = "select count(s_suppkey), count(s_nationkey) from supplier" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_04' query = "select count(c_custkey), sum(c_acctbal), sum(c_acctbal)/count(c_acctbal), min(c_custkey), max(c_nationkey), (max(c_nationkey) + min(c_nationkey))/2 c_nationkey from customer where c_custkey < 100 group by c_nationkey" runTest.run_query( bc, drill, query, queryId, queryType, worder, '', 0.01, True, fileSchemaType) #TODO: Change sum/count for avg KC queryId = 'TEST_05' query = "select c.c_custkey, c.c_nationkey, n.n_regionkey from customer as c inner join nation as n on c.c_nationkey = n.n_nationkey where n.n_regionkey = 1 and c.c_custkey < 50" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_06' query = "select c_custkey, c_nationkey, c_acctbal from customer order by c_nationkey, c_custkey, c_acctbal" runTest.run_query(bc, drill, query, queryId, queryType, 0, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_07' query = "select c_custkey + c_nationkey, c_acctbal from customer order by 1, 2" runTest.run_query(bc, drill, query, queryId, queryType, 0, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_08' query = "select n1.n_nationkey as supp_nation, n2.n_nationkey as cust_nation, l.l_extendedprice * l.l_discount from supplier as s inner join lineitem as l on s.s_suppkey = l.l_suppkey inner join orders as o on o.o_orderkey = l.l_orderkey inner join customer as c on c.c_custkey = o.o_custkey inner join nation as n1 on s.s_nationkey = n1.n_nationkey inner join nation as n2 on c.c_nationkey = n2.n_nationkey where n1.n_nationkey = 1 and n2.n_nationkey = 2 and o.o_orderkey < 10000" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_09' query = "select c_custkey, c_nationkey as nkey from customer where c_custkey < 0 and c_nationkey >=30" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_10' query = "select sin(c_acctbal), cos(c_acctbal), asin(c_acctbal), acos(c_acctbal), ln(c_acctbal), tan(c_acctbal), atan(c_acctbal), floor(c_acctbal), c_acctbal from customer" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', 0.01, use_percentage, fileSchemaType) queryId = 'TEST_11' query = "select n1.n_nationkey as n1key, n2.n_nationkey as n2key, n1.n_nationkey + n2.n_nationkey from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6 where n1.n_nationkey < 10 and n1.n_nationkey > 5" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_12' query = "select count(n1.n_nationkey) as n1key, count(n2.n_nationkey) as n2key, count(*) as cstar from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_13' query = "select o_orderkey, o_custkey from orders where o_orderkey < 10 and o_orderkey >= 1" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) queryId = 'TEST_14' query = "select 100168549 - sum(o_orderkey)/count(o_orderkey), 56410984/sum(o_totalprice), (123 - 945/max(o_orderkey))/(sum(81619/o_orderkey)/count(81619/o_orderkey)) from orders where o_orderkey < 50" runTest.run_query( bc, drill, query, queryId, queryType, worder, '', 0.01, True, fileSchemaType) #TODO: Change sum/count for avg KC queryId = 'TEST_15' query = "select EXTRACT(YEAR FROM l_receiptdate) - EXTRACT(YEAR FROM l_shipdate) as years_late, EXTRACT(MONTH FROM l_receiptdate) - EXTRACT(MONTH FROM l_shipdate) as months_late, EXTRACT(DAY FROM l_receiptdate) - EXTRACT(DAY FROM l_shipdate) as days_late from lineitem where l_shipdate < DATE '1993-01-01'" runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
def executionTest(): tables = ["orders", "nation", "lineitem"] data_types = [ DataType.DASK_CUDF, DataType.CUDF, DataType.CSV, DataType.ORC, DataType.PARQUET, ] # TODO json # Create Tables ----------------------------------------------------- for fileSchemaType in data_types: if skip_test(dask_client, nRals, fileSchemaType, queryType): continue cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables) # Run Query ------------------------------------------------------ worder = 1 use_percentage = False acceptable_difference = 0.01 print("==============================") print(queryType) print("==============================") # ------------------ UNION ALL --------------------- queryId = "TEST_01" query = """(select o_orderkey, o_custkey from orders where o_orderkey < 100 ) union all ( select o_orderkey, o_custkey from orders where o_orderkey < 300 and o_orderkey >= 200 )""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "o_orderkey", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_02" query = """(select o_orderkey, o_custkey from orders where o_orderkey < 100 ) union all ( select o_orderkey, o_custkey from orders where o_orderkey < 300 and o_orderkey >= 200 ) order by 2""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "o_orderkey", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_03" query = """(select o_orderkey, o_totalprice as key from orders where o_orderkey < 100 ) union all ( select o_orderkey, o_custkey as key from orders where o_orderkey < 300 and o_orderkey >= 200 )""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "o_orderkey", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_04" query = """(select o_orderkey, null as keyy, o_totalprice, cast(null as int) as o_totalprice2, null as field5, null as field6 from orders where o_orderkey < 100 ) union all ( select o_orderkey + 100.1 as o_orderkey, o_custkey as keyy, null as o_totalprice, o_totalprice as o_totalprice2, null as field5, cast(null as double) as field6 from orders where o_orderkey < 300 and o_orderkey >= 200 )""" query_spark = """(select o_orderkey, cast(null as int) as keyy, o_totalprice, cast(null as double) as o_totalprice2, cast(null as int) as field5, cast(null as double) as field6 from orders where o_orderkey < 100 ) union all ( select o_orderkey + 100.1 as o_orderkey, o_custkey as keyy, cast(null as double) as o_totalprice, o_totalprice as o_totalprice2, cast(null as int) as field5, cast(null as double) as field6 from orders where o_orderkey < 300 and o_orderkey >= 200 )""" runTest.run_query( bc, spark, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, query_spark=query_spark, ) queryId = "TEST_05" query = """(select o_orderkey, 100.1, o_totalprice, cast(100 as float), 100, 1.1 from orders where o_orderkey < 100 ) union all ( select o_orderkey + 100.1 as o_orderkey, o_custkey as keyy, 10000, o_totalprice, 101.1,100 from orders where o_orderkey < 300 and o_orderkey >= 200 )""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "o_orderkey", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_06" query = """(select o_orderkey, o_orderstatus, o_orderstatus from orders where o_orderkey < 100 ) union all ( select o_orderkey + 100.1 as o_orderkey, SUBSTRING(o_orderstatus, 2, 4), 'hello work' from orders where o_orderkey < 300 and o_orderkey >= 200 )""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "o_orderkey", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_07" query = """(select o_orderkey, o_custkey from orders where o_orderkey < 100 ) union all (select o_orderkey, o_custkey from orders where o_orderkey < 300 and o_orderkey >= 200) order by 2""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "o_orderkey", acceptable_difference, use_percentage, fileSchemaType, ) # ------------------ UNION --------------------- queryId = "TEST_08" query = """(select o_orderkey, o_custkey from orders where o_orderkey < 100 ) union ( select o_orderkey, o_custkey from orders where o_orderkey < 200 and o_orderkey >= 10 )""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "o_orderkey", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_09" query = """(select o_orderkey, o_custkey from orders where o_orderkey < 60 ) union ( select o_orderkey, o_custkey from orders where o_orderkey < 200 and o_orderkey >= 10 ) order by 2""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "o_orderkey", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_10" query = """(select o_orderkey, o_orderstatus, o_orderstatus from orders where o_orderkey < 100 ) union ( select o_orderkey + 100.1 as o_orderkey, SUBSTRING(o_orderstatus, 2, 4), 'hello work' from orders where o_orderkey < 300 and o_orderkey >= 5 )""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "o_orderkey", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_11" query = """(select nat1.n_nationkey, nat1.n_name from nation as nat1 inner join lineitem on nat1.n_nationkey = mod(l_suppkey, 1010) where nat1.n_name like 'INDIA' ) union ( select nat2.n_nationkey, nat2.n_name from nation as nat2 inner join orders on nat2.n_nationkey = mod(o_orderkey, 1010) where nat2.n_name like 'INDIA' )""" runTest.run_query( bc, spark, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_12" query = """select l_returnflag, l_shipdate, l_linestatus from lineitem where l_orderkey < 100 and l_linenumber < 2 union all select l_returnflag, l_shipdate, l_linestatus from lineitem where l_partkey < 1 and l_orderkey < 2 and l_linenumber < 2""" if fileSchemaType == DataType.ORC: runTest.run_query( bc, spark, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) else: runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_13" query = """select o_orderpriority as l_returnflag, o_orderdate as l_shipdate, o_orderstatus as l_linestatus from orders where o_orderkey < 100 union all select l_returnflag, l_shipdate, l_linestatus from lineitem where l_orderkey = 3""" if fileSchemaType == DataType.ORC: runTest.run_query( bc, spark, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) else: runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_14" query = """select o_orderdate as d1, o_orderpriority as s1, o_orderstatus as s2, o_orderkey as l1 from orders where o_orderkey < 100 union all select o_orderdate as d1, o_orderpriority as s1, o_orderstatus as s2, o_orderkey as l1 from orders where o_custkey < 100 union all select o_orderdate as d1, o_orderpriority as s1, o_orderstatus as s2, o_orderkey as l1 from orders where o_orderstatus = 'O' union all select o_orderdate as d1, o_orderpriority as s1, o_orderstatus as s2, o_orderkey as l1 from orders where o_totalprice < 350""" if fileSchemaType == DataType.ORC: runTest.run_query( bc, spark, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) else: runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) if Settings.execution_mode == ExecutionMode.GENERATOR: print("==============================") break
def executionTest(): tables = [ "partsupp", "lineitem", "part", "supplier", "orders", "customer", "region", "nation", ] data_types = [ DataType.DASK_CUDF, DataType.CUDF, DataType.CSV, DataType.ORC, DataType.PARQUET, ] # TODO json # Create Tables ----------------------------------------------------- for fileSchemaType in data_types: if skip_test(dask_client, nRals, fileSchemaType, queryType): continue cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables) # Run Query ------------------------------------------------------ worder = 1 use_percentage = False acceptable_difference = 0.1 print("==============================") print(queryType) print("==============================") queryId = "TEST_01" query = """select count(distinct (n_regionkey + n_nationkey)), n_regionkey from nation group by n_regionkey""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "n_regionkey", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_02" query = """select count(distinct o_custkey), o_orderkey from orders where o_orderkey < 100 group by o_orderkey, (o_orderkey + o_custkey)""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "o_orderkey", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_03" query = """select count(distinct(o_orderkey + o_custkey)) as new_col, sum(o_orderkey), o_custkey from orders group by o_custkey""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "o_custkey", acceptable_difference, use_percentage, fileSchemaType, ) queryId = 'TEST_04' query = """select count(distinct(o_custkey)), avg(o_totalprice), (o_orderkey + o_custkey) as num from orders where o_custkey < 100 group by o_custkey, o_orderkey""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_05" query = """select count(distinct(o_custkey)), max(o_totalprice), min(o_totalprice), avg(o_totalprice) from orders group by o_custkey""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", 0.01, use_percentage, fileSchemaType, ) queryId = "TEST_06" query = """select n_nationkey, count(distinct( n_regionkey + n_nationkey))/count(n_nationkey) from nation group by n_nationkey""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", 0.01, use_percentage, fileSchemaType, ) queryId = "TEST_07" query = """select count(distinct(o_orderdate)), count(distinct(o_custkey)), count(distinct(o_totalprice)), sum(o_orderkey) from orders group by o_custkey""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = 'TEST_08' query = """select COUNT(DISTINCT(n.n_nationkey)), AVG(r.r_regionkey) from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_09" query = """select MIN(n.n_nationkey), MAX(r.r_regionkey), COUNT(DISTINCT(n.n_nationkey + r.r_regionkey)) from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = 'TEST_10' query = """select COUNT(DISTINCT(n1.n_nationkey)) as n1key, COUNT(DISTINCT(n2.n_nationkey)) as n2key from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_regionkey""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = 'TEST_11' query = """select r.r_regionkey, n.n_nationkey, COUNT(n.n_nationkey), COUNT(DISTINCT(r.r_regionkey)), SUM(DISTINCT(n.n_nationkey + r.r_regionkey)) from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey GROUP BY r.r_regionkey, n.n_nationkey""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = "TEST_12" query = """select n1.n_regionkey, n2.n_nationkey, MIN(n1.n_regionkey), MAX(n1.n_regionkey), AVG(n2.n_nationkey) from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6 GROUP BY n1.n_regionkey, n2.n_nationkey""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = 'TEST_13' query = """select COUNT(DISTINCT(n.n_nationkey)), AVG(r.r_regionkey) from nation as n right outer join region as r on n.n_nationkey = r.r_regionkey""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) queryId = 'TEST_14' query = """select r.r_regionkey, n.n_nationkey, COUNT(n.n_nationkey), COUNT(DISTINCT(r.r_regionkey)), SUM(DISTINCT(n.n_nationkey + r.r_regionkey)) from nation as n right outer join region as r on n.n_nationkey = r.r_regionkey GROUP BY r.r_regionkey, n.n_nationkey""" runTest.run_query( bc, drill, query, queryId, queryType, worder, "", acceptable_difference, use_percentage, fileSchemaType, ) if Settings.execution_mode == ExecutionMode.GENERATOR: print("==============================") break