示例#1
0
    def __fetch_results(self):
        done_count = 0
        total_done = len(self.config_concurrent)
        while done_count < total_done:
            for token_name in self.config_concurrent:
                config = self.config_concurrent[token_name]
                configTest = config.configTest
                engine = self.drill if configTest.compare_with == "drill" else self.spark

                if not self.config_concurrent[token_name].done:
                    if self.bc.status(config.token):
                        self.config_concurrent[token_name].done = True
                        done_count = done_count + 1
                        print("==>> Fetch result for ", token_name)
                        result_gdf = self.bc.fetch(config.token)
                        runTest.run_query(
                            self.bc,
                            engine,
                            config.query,
                            config.test_name,
                            self.name,
                            configTest.apply_order,
                            configTest.order_by_col,
                            configTest.acceptable_difference,
                            configTest.use_percentage,
                            config.fileSchemaType,
                            print_result=configTest.print_result,
                            query_spark=configTest.spark_query,
                            comparing=configTest.comparing,
                            message_validation=configTest.message_validation,
                            blz_result=result_gdf
                        )

        self.config_concurrent = {}
示例#2
0
def run_queries(bc, dask_client, nRals, drill, dir_data_lc, tables, **kwargs):
    sql_table_filter_map = kwargs.get("sql_table_filter_map", {})
    sql_table_batch_size_map = kwargs.get("sql_table_batch_size_map", {})
    sql = kwargs.get("sql_connection", None)
    print("######## Starting queries ...########")
    extra_args = {
        "table_names": tables,
        "init_tables": True,
        "ds_types": data_types,
        "sql_table_filter_map": sql_table_filter_map,
        "sql_table_batch_size_map": sql_table_batch_size_map,
        "sql_connection": sql,
    }
    currrentFileSchemaType = data_types[0]
    for sampleId, query, queryId, fileSchemaType in samples(
            bc, dask_client, nRals, **extra_args):
        datasourceDone = (fileSchemaType != currrentFileSchemaType)
        if datasourceDone and Settings.execution_mode == ExecutionMode.GENERATOR:
            print("==============================")
            break_flag = True
            break

        print("==>> Run query for sample", sampleId)
        runTest.run_query(bc,
                          drill,
                          query,
                          queryId,
                          queryType,
                          worder,
                          "",
                          acceptable_difference,
                          use_percentage,
                          fileSchemaType,
                          print_result=True)
        currrentFileSchemaType = fileSchemaType
示例#3
0
def run_queries(bc, dask_client, nRals, drill, spark, dir_data_lc, tables,
                **kwargs):
    sql_table_filter_map = kwargs.get("sql_table_filter_map", {})
    sql_table_batch_size_map = kwargs.get("sql_table_batch_size_map", {})
    sql = kwargs.get("sql_connection", None)
    print("######## Starting queries ...########")
    extra_args = {
        "table_names": tables,
        "init_tables": True,
        "ds_types": data_types,
        "sql_table_filter_map": sql_table_filter_map,
        "sql_table_batch_size_map": sql_table_batch_size_map,
        "sql_connection": sql,
        "dir_data_lc": dir_data_lc,
    }
    currrentFileSchemaType = data_types[0]
    for sampleUID, sampleId, fileSchemaType, datasource_tables in sample_items(
            bc, dask_client, nRals, **extra_args):
        datasourceDone = (fileSchemaType != currrentFileSchemaType)
        if datasourceDone and Settings.execution_mode == ExecutionMode.GENERATOR:
            print("==============================")
            break_flag = True
            break

        sample = samples[sampleId]

        query = sample.table_mapper(
            sample.query, datasource_tables
        )  # map to tables with datasource info: order_csv, nation_csv ...
        worder = sample.worder
        use_percentage = sample.use_percentage
        acceptable_difference = sample.acceptable_difference
        use_pyspark = sample.use_pyspark
        engine = spark if use_pyspark else drill
        query_spark = sample.table_mapper(
            sample.query
        )  # map to tables without datasource info: order, nation ...

        print("==>> Run query for sample", sampleId)
        print("PLAN:")
        print(bc.explain(query, True))
        runTest.run_query(bc,
                          engine,
                          query,
                          sampleId,
                          queryType,
                          worder,
                          "",
                          acceptable_difference,
                          use_percentage,
                          fileSchemaType,
                          query_spark=query_spark,
                          print_result=True)
        currrentFileSchemaType = fileSchemaType
示例#4
0
    def __executionTest(self):
        listCase = list(self.data.keys())

        print("######## Starting queries ...########")

        for n in range(0, len(self.configLocal.data_types)):

            fileSchemaType = self.configLocal.data_types[n]

            if self.__skip_test(fileSchemaType, self.configLocal): continue

            createSchema.create_tables(self.bc,
                                       self.dir_data_file,
                                       fileSchemaType,
                                       tables=list(self.tables))

            for test_name in listCase:
                test_case = self.data[test_name]

                if Settings.execution_mode == ExecutionMode.GENERATOR:
                    print("==============================")
                    break_flag = True
                    break

                configTest = self.__loadTestCaseConfig(test_name,
                                                       fileSchemaType)

                if self.__skip_test(fileSchemaType, configTest): continue

                query = self.__getQuery(test_case)
                engine = self.drill if configTest.compare_with == "drill" else self.spark

                print("==>> Run query for test case", self.name)
                if configTest.message_validation == "":
                    print("PLAN:")
                    print(self.bc.explain(query, True))
                runTest.run_query(
                    self.bc,
                    engine,
                    query,
                    test_name,
                    self.name,
                    configTest.apply_order,
                    configTest.order_by_col,
                    configTest.acceptable_difference,
                    configTest.use_percentage,
                    fileSchemaType,
                    print_result=configTest.print_result,
                    query_spark=configTest.spark_query,
                    comparing=configTest.comparing,
                    message_validation=configTest.message_validation)
示例#5
0
    def executionTest (queryType):
        tables = ["nation", "region", "customer", "orders", "lineitem"]
        data_types =  [DataType.DASK_CUDF, DataType.CUDF, DataType.CSV, DataType.ORC, DataType.PARQUET] # TODO json

        #Create Tables ------------------------------------------------------------------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType): continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)
            
            #Run Query -----------------------------------------------------------------------------
            worder = 1 #Parameter to indicate if its necessary to order the resulsets before compare them
            use_percentage = False
            acceptable_difference = 0
            
            print('==============================')
            print(queryType + " Tests")
            print('==============================')

            queryId = 'TEST_01'
            query = """select ROUND(orders.o_orderkey), ROUND(orders.o_totalprice) from customer 
                    left outer join orders on customer.c_custkey = orders.o_custkey 
                    where  customer.c_nationkey = 3 and customer.c_custkey < 500"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
             
            queryId = 'TEST_02'
            query = """select ROUND(orders.o_totalprice, 2), ROUND(orders.o_totalprice, -2) 
                    from customer left outer join orders on customer.c_custkey = orders.o_custkey 
                    where  customer.c_nationkey = 3 and customer.c_custkey < 500"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
            
            queryId = 'TEST_03'
            query = """select customer.c_custkey, orders.o_orderkey, ROUND(orders.o_custkey,0) 
                    from customer left outer join orders on customer.c_custkey = orders.o_custkey 
                    where  customer.c_nationkey = 3 and customer.c_custkey < 500"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
              
            queryId = 'TEST_04'
            query = """select MAX(ROUND(n1.n_regionkey,3)) from nation as n1 
                    full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6 """
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) #WSM NEED TO REVISIT THIS
              
            queryId = 'TEST_05'
            query = "select ROUND(AVG(o_totalprice)) from orders"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)

            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break
示例#6
0
    def executionTest():
        tables = ["nation"]

        data_types = [
            DataType.DASK_CUDF, DataType.CUDF, DataType.CSV, DataType.ORC,
            DataType.PARQUET
        ]  # TODO json

        #Create Tables ------------------------------------------------------------------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            #Run Query -----------------------------------------------------------------------------
            worder = 1  #Parameter to indicate if its necessary to order the resulsets before compare them
            use_percentage = False
            acceptable_difference = 0

            print('==============================')
            print(queryType)
            print('==============================')

            queryId = 'TEST_01'
            query = """select n1.n_nationkey as n1key, n2.n_nationkey as n2key, n1.n_nationkey + n2.n_nationkey 
                        from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_02'
            query = "select n1.n_nationkey as n1key, n2.n_nationkey as n2key, n1.n_nationkey + n2.n_nationkey from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6 where n1.n_nationkey < 10"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_03'
            query = "select n1.n_nationkey as n1key, n2.n_nationkey as n2key, n1.n_nationkey + n2.n_nationkey from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6 where n1.n_nationkey < 10 and n1.n_nationkey > 5"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_04'
            query = """select n1.n_nationkey as n1key, n2.n_nationkey as n2key, n1.n_nationkey + n2.n_nationkey 
                        from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6 and n1.n_nationkey + 1 = n2.n_nationkey + 7 and n1.n_nationkey + 2 = n2.n_nationkey + 8"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break
示例#7
0
def fetch_result(bc, tokens, sampleId, drill, query, queryId, fileSchemaType):
    token = tokens[sampleId]
    if bc.status(token):
        print("==>> Fetch result for sample", sampleId)
        result_gdf = bc.fetch(token)
        runTest.run_query(
            bc,
            drill,
            query,
            queryId,
            queryType,
            worder,
            "",
            acceptable_difference,
            use_percentage,
            fileSchemaType,
            blz_result=result_gdf,
        )
        return True
    return False
    def executionTest():        
        tables = ["nation", "region", "customer"]
        data_types =  [DataType.DASK_CUDF, DataType.CUDF, DataType.CSV, DataType.ORC, DataType.PARQUET] # TODO json

        #Create Tables ------------------------------------------------------------------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType): continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)
             
            #Run Query -----------------------------------------------------------------------------
            worder = 1 #Parameter to indicate if its necessary to order the resulsets before compare them
            use_percentage = False
            acceptable_difference = 0
            
            print('==============================')
            print(queryType)
            print('==============================')
            
            queryId = 'TEST_01'
            query = "select n_nationkey, n_regionkey from nation group by n_regionkey, n_nationkey"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
        
            queryId = 'TEST_02'
            query = "select c_custkey, c_nationkey from customer where c_acctbal < 1000 group by c_nationkey, c_custkey"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
        
            queryId = 'TEST_03'
            query = "select c.c_custkey, r.r_regionkey, c.c_custkey + r.r_regionkey as addy from customer as c inner join region as r on c.c_nationkey = r.r_regionkey group by r.r_regionkey, c.c_custkey"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
        
            queryId = 'TEST_04'
            query = "select c_nationkey, c_custkey from customer where c_acctbal < 10000 group by c_nationkey, c_custkey order by c_nationkey desc, c_custkey asc"
            runTest.run_query(bc, drill, query, queryId, queryType, 0, '', acceptable_difference, use_percentage, fileSchemaType)
        
            queryId = 'TEST_05'
            query = "select c.c_custkey, r.r_regionkey, c.c_custkey + r.r_regionkey as addy from customer as c inner join region as r on c.c_nationkey = r.r_regionkey where c.c_acctbal < 1000 group by r.r_regionkey, c.c_custkey order by c.c_custkey desc"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
        
            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break
示例#9
0
    def executionTest():  
        tables = ["nation", "region", "customer", "orders", "part", "partsupp", "supplier"]
        data_types =  [DataType.DASK_CUDF, DataType.CUDF, DataType.CSV, DataType.ORC, DataType.PARQUET] # TODO json

        #Create Tables ------------------------------------------------------------------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType): continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)
        
            #Run Query -----------------------------------------------------------------------------
            worder = 1 #Parameter to indicate if its necessary to order the resulsets before compare them
            use_percentage = False
            acceptable_difference = 0.01
            
            print('==============================')
            print(queryType)
            print('==============================')
        
            queryId = 'TEST_01'
            query = """select maxPrice, avgSize from
                (select avg(CAST(p_size AS DOUBLE)) as avgSize, max(p_retailprice) as maxPrice, min(p_retailprice) as minPrice from part ) as partAnalysis
                order by maxPrice, avgSize"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
         
            queryId = 'TEST_02'
            query = """select custOrders.avgPrice, custOrders.numOrders from customer
                inner join
                (select o_custkey as o_custkey, avg(o_totalprice) as avgPrice, count(o_totalprice) as numOrders from orders
                where o_custkey <= 100 group by o_custkey) as custOrders
                on custOrders.o_custkey = customer.c_custkey
                where customer.c_nationkey <= 5"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
          
            queryId = 'TEST_03'
            query = """select partSuppTemp.partKey, partAnalysis.avgSize from
                (select min(p_partkey) as partKey,  avg(CAST(p_size AS DOUBLE)) as avgSize, max(p_retailprice) as maxPrice, min(p_retailprice) as minPrice from part ) as partAnalysis
                inner join (select ps_partkey as partKey, ps_suppkey as suppKey from partsupp where ps_availqty > 2) as partSuppTemp on partAnalysis.partKey = partSuppTemp.partKey
                inner join (select s_suppkey as suppKey from supplier ) as supplierTemp on supplierTemp.suppKey = partSuppTemp.suppKey"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
          
            queryId = 'TEST_04'
            query = """select avg(CAST(custKey AS DOUBLE)) from
                (select customer.c_custkey as custKey from
                (select min(o_custkey) as o_custkey from orders ) as tempOrders
                inner join customer on tempOrders.o_custkey = customer.c_custkey
                where customer.c_nationkey > 6) as joinedTables"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
            
            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break
示例#10
0
    def executionTest():
        tables = ["customer"]
        data_types = [
            DataType.DASK_CUDF, DataType.CUDF, DataType.CSV, DataType.ORC,
            DataType.PARQUET
        ]  # TODO json

        #Create Tables ------------------------------------------------------------------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            #Run Query -----------------------------------------------------------------------------
            worder = 0  #Parameter to indicate if its necessary to order the resulsets before compare them
            use_percentage = False
            acceptable_difference = 0.01

            print('==============================')
            print(queryType)
            print('==============================')

            queryId = 'TEST_01'
            query = "select c_custkey, c_acctbal from customer order by c_acctbal desc, c_custkey"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_02'
            query = "select c_acctbal from customer order by c_acctbal"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_03'
            query = "select c_custkey, c_nationkey, c_acctbal from customer order by c_nationkey, c_acctbal, c_custkey desc"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_04'
            query = "select c_custkey + c_nationkey, c_acctbal from customer order by 1 desc, 2"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break
示例#11
0
    def executionTest():
        tables = ['supplier', 'lineitem', 'partsupp', 'part']
        data_types = [DataType.CUDF]  # TODO csv orc parquet json

        #Create Tables ------------------------------------------------------------------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_lc, fileSchemaType, tables=tables)

            #Run Query -----------------------------------------------------------------------------

            worder = 1  #Parameter to indicate if its necessary to order the resulsets before compare them
            use_percentage = False
            acceptable_difference = 0.01

            print('==============================')
            print(queryType)
            print('==============================')

            queryId = 'TEST_01'
            query = """select  
                        p_partkey, p_mfgr
                    from part         
                    where p_size = 35 and p_type like 'STEEL%'"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_02'
            query = """select
                        s.s_acctbal, s.s_name, p.p_partkey, p.p_mfgr, s.s_address, s.s_phone, s.s_comment
                    from part p 
                        inner join partsupp ps on ps.ps_partkey = p.p_partkey
                        inner join supplier s on s.s_suppkey = ps.ps_suppkey
                    where
                        p.p_type like '%STEEL'"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_03'
            query = """SELECT 100.00* SUM(CASE WHEN p.p_type LIKE 'PROMO%' THEN l.l_extendedprice*(1-l.l_discount)
                            ELSE 0 END) / SUM(l.l_extendedprice*(1-l.l_discount)) AS promo_revenue
                      FROM lineitem l
                      INNER JOIN part p ON l.l_partkey = p.p_partkey
                      WHERE l.l_shipdate < '1995-09-01'"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break
示例#12
0
    def executionTest():
        tables = [
            "nation", "region", "customer", "lineitem", "orders", "supplier"
        ]
        data_types = [
            DataType.DASK_CUDF,
            DataType.CUDF,
            DataType.CSV,
            DataType.ORC,
            DataType.PARQUET,
        ]  # TODO json

        # Create Tables -----------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            # Run Query ------------------------------------------------------
            # Parameter to indicate if its necessary to order
            # the resulsets before compare them
            worder = 1
            use_percentage = False
            acceptable_difference = 0

            print("==============================")
            print(queryType)
            print("==============================")

            queryId = "TEST_01"
            query = """select nation.n_nationkey, region.r_regionkey
                    from nation inner join region
                    on region.r_regionkey = nation.n_nationkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_02"
            query = """select avg(CAST(c.c_custkey AS DOUBLE)),
                        avg(CAST(c.c_nationkey AS DOUBLE)),
                        n.n_regionkey
                    from customer as c inner join nation as n
                    on c.c_nationkey = n.n_nationkey
                    group by n.n_regionkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                0.01,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_03"
            query = """select c.c_custkey, c.c_nationkey, n.n_regionkey
                    from customer as c
                    inner join nation as n
                    on c.c_nationkey = n.n_nationkey
                    where n.n_regionkey = 1
                    and c.c_custkey < 50"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_04"
            query = """select avg(CAST(c.c_custkey AS DOUBLE)),
                        avg(c.c_acctbal), n.n_nationkey,
                        r.r_regionkey
                    from customer as c
                    inner join nation as n on c.c_nationkey = n.n_nationkey
                    inner join region as r on r.r_regionkey = n.n_regionkey
                    group by n.n_nationkey, r.r_regionkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                0.01,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_05"
            query = """select n1.n_nationkey as supp_nation,
                        n2.n_nationkey as cust_nation,
                        l.l_extendedprice * l.l_discount
                    from supplier as s
                    inner join lineitem as l
                    on s.s_suppkey = l.l_suppkey
                    inner join orders as o on o.o_orderkey = l.l_orderkey
                    inner join customer as c on c.c_custkey = o.o_custkey
                    inner join nation as n1 on s.s_nationkey = n1.n_nationkey
                    inner join nation as n2 on c.c_nationkey = n2.n_nationkey
                    where n1.n_nationkey = 1
                    and n2.n_nationkey = 2
                    and o.o_orderkey < 10000"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                0.01,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_06"
            query = """SELECT n.n_nationkey + 1, n.n_regionkey from nation
                    AS n inner join region AS r ON
                    n.n_regionkey = r.r_regionkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_07"
            query = """SELECT n.n_nationkey + 1, n.n_regionkey from nation
                    AS n INNER JOIN region AS r
                    ON n.n_regionkey = r.r_regionkey
                    and n.n_nationkey = 5"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_08"
            query = """select * from nation n1 inner join nation n2
                    on n1.n_nationkey = n2.n_nationkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_09"
            query = """select n1.n_nationkey, n2.n_nationkey
                    from nation n1 inner join nation n2
                    on n1.n_nationkey = n2.n_nationkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_10"
            query = """select l.l_orderkey, l.l_linenumber, n.n_nationkey
                    from lineitem as l inner join nation as n
                    on l.l_orderkey = n.n_nationkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_11"
            query = """select c.c_custkey, c.c_nationkey, n.o_orderkey
                    from customer as c
                    inner join orders as n on c.c_custkey = n.o_custkey
                    where n.o_orderkey < 100"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_12"
            query = """select c.c_custkey, c.c_nationkey, o.o_orderkey
                    from customer as c
                    inner join orders as o on c.c_custkey = o.o_custkey
                    inner join nation as n on c.c_nationkey = n.n_nationkey
                    order by c_custkey, o.o_orderkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                0,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            #ERROR: Different values GDF and PSV
            # queryId = "TEST_13"
            # query = """select c.c_name, o.o_orderkey, o.o_totalprice,
            #             l.l_partkey, l.l_returnflag
            #         from lineitem as l
            #         inner join orders as o on o.o_orderkey = l.l_orderkey
            #         inner join customer as c on c.c_custkey = o.o_custkey
            #         and l.l_linenumber < 3 and c.c_custkey < 30"""
            # runTest.run_query(
            #     bc,
            #     drill,
            #     query,
            #     queryId,
            #     queryType,
            #     worder,
            #     "",
            #     acceptable_difference,
            #     use_percentage,
            #     fileSchemaType,
            #     print_result=True,
            # )

            #ERROR: Different values GDF and PSV
            # queryId = "TEST_14"
            # query = """select o.o_orderkey, o.o_totalprice, l.l_partkey
            #         from lineitem as l
            #         inner join orders as o on o.o_orderkey = l.l_orderkey * 2
            #         inner join customer as c on c.c_nationkey = o.o_custkey"""
            # runTest.run_query(
            #     bc,
            #     drill,
            #     query,
            #     queryId,
            #     queryType,
            #     worder,
            #     "",
            #     acceptable_difference,
            #     use_percentage,
            #     fileSchemaType,
            #     print_result=True,
            # )

            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break
示例#13
0
    def executionTest(): 
        tables = ['nation', 'region', 'supplier', 'customer', 'lineitem', 'orders', 'part']
        data_types =  [DataType.CUDF, DataType.CSV, DataType.ORC, DataType.PARQUET] # TODO parquet json

        #Create Tables ------------------------------------------------------------------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType): continue
            cs.create_tables(bc, dir_data_lc, fileSchemaType, tables=tables)
            
            #Run Query -----------------------------------------------------------------------------
            worder = 1 #Parameter to indicate if its necessary to order the resulsets before compare them
            use_percentage = False
            acceptable_difference = 0.01
            
            print('==============================')
            print(queryType)
            print('==============================')

            queryId = 'TEST_01'
            query = """select p_partkey, p_retailprice, cast(cast(p_retailprice as VARCHAR) as DOUBLE) from part order by p_partkey limit 10"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
 
            queryId = 'TEST_02'
            query = """select CAST(c_custkey as BIGINT), c_acctbal from customer order by c_acctbal desc, c_custkey"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
 
            queryId = 'TEST_03'
            query = """select SUM(c_custkey), CAST(c_custkey as VARCHAR)
                    from customer where c_custkey between 123 and 125 group by c_custkey"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)     
 
            queryId = 'TEST_04'
            query = """select cast(o_totalprice AS DOUBLE) * o_orderkey from orders where o_orderkey between 990 and 1010"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)

            queryId = 'TEST_05'
            query = """select o_custkey, o_orderkey, cast(o_custkey AS FLOAT) * o_orderkey from orders where o_custkey between 998 and 1000 order by o_custkey, o_orderkey"""
            runTest.run_query(bc, drill, query, queryId, queryType, 0, '', acceptable_difference, True,fileSchemaType)

            queryId = 'TEST_06'
            query = """select cast(c_nationkey AS INTEGER) from customer where c_custkey < 100 and c_nationkey in (19, 20)"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
 
            queryId = 'TEST_07'
            query = """select cast(o_orderkey AS FLOAT) * o_totalprice from orders where o_orderkey < 10 order by o_orderkey"""
            runTest.run_query(bc, drill, query, queryId, queryType, 0, '', acceptable_difference, True, fileSchemaType)
 
            queryId = 'TEST_08'
            query = """select cast(o_orderkey AS TINYINT) from orders where o_orderkey < 120"""
            runTest.run_query(bc, spark, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)

            queryId = 'TEST_09'
            query = """select cast(o_orderkey AS SMALLINT) from orders where o_orderkey < 32000"""
            runTest.run_query(bc, spark, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)

            queryId = 'TEST_10'
            query = """select cast(o_totalprice AS INTEGER) * o_orderkey from orders where o_orderkey < 10"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
 
            queryId = 'TEST_11'
            query = """select cast(o_orderdate AS TIMESTAMP) from orders where o_orderkey < 10"""
            if fileSchemaType == DataType.ORC:
                runTest.run_query(bc, spark, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
            else:
                runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
 
            # TODO: FIx cast(o_orderdate AS DATE) when fileSchemaType is ORC
            queryId = 'TEST_12'
            query =  """select cast(o_orderdate AS TIMESTAMP) from orders where cast(o_orderdate as TIMESTAMP) 
                        between '1995-01-01' and '1995-01-05'"""
            if fileSchemaType != DataType.ORC:
                runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
示例#14
0
    def executionTest():
        tables = [
            "nation",
            "region",
            "customer",
            "orders",
            "part",
            "partsupp",
            "supplier",
        ]
        data_types = [
            DataType.DASK_CUDF,
            DataType.CUDF,
            DataType.CSV,
            DataType.ORC,
            DataType.PARQUET,
        ]  # TODO json

        # Create Tables -----------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            # Run Query ------------------------------------------------------
            # Parameter to indicate if its necessary to order
            # the resulsets before compare them
            worder = 1
            use_percentage = False
            acceptable_difference = 0

            print("==============================")
            print(queryType)
            print("==============================")

            queryId = "TEST_01"
            query = """with nationTemp as
                    (select n_nationkey, n_regionkey as fkey
                    from nation where n_nationkey > 3
                    order by n_nationkey)
                    select region.r_regionkey, nationTemp.n_nationkey
                    from region inner join nationTemp
                    on region.r_regionkey = nationTemp.fkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_02"
            query = """with regionTemp as (
                    select r_regionkey from region where r_regionkey > 2 ),
                    nationTemp as(select n_nationkey, n_regionkey as fkey
                    from nation where n_nationkey > 3 order by n_nationkey)
                    select regionTemp.r_regionkey, nationTemp.fkey
                    from regionTemp inner join nationTemp
                    on regionTemp.r_regionkey = nationTemp.fkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_03"
            query = """with ordersTemp as (
                    select min(o_orderkey) as priorityKey, o_custkey
                    from orders group by o_custkey
                    ), ordersjoin as( select orders.o_custkey from orders
                    inner join ordersTemp
                    on ordersTemp.priorityKey = orders.o_orderkey)
                    select customer.c_custkey, customer.c_nationkey
                    from customer inner join ordersjoin
                    on ordersjoin.o_custkey =  customer.c_custkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            #     queryId = 'TEST_04'
            #     query = """with ordersTemp as (
            #         select min(orders.o_orderkey) as priorityKey,
            #        o_custkey from orders group by o_custkey
            #     ), ordersjoin as(
            #         select orders.o_orderkey, orders.o_custkey
            #    / (orders.o_custkey + 1) as o_custkey,
            #   (ordersTemp.priorityKey + 1) as priorityKey from orders
            #  inner join ordersTemp on
            #   (ordersTemp.priorityKey = orders.o_orderkey)
            #     )
            #     select (customer.c_custkey + 1)
            #   / (customer.c_custkey - customer.c_custkey + 1) from customer
            #  inner join ordersjoin
            #   on ordersjoin.o_custkey = customer.c_custkey
            #  where (customer.c_custkey > 1 or customer.c_custkey < 100)"""
            # runTest.run_query(bc, drill, query, queryId, queryType, worder,
            #  '', acceptable_difference, use_percentage, fileSchemaType)

            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break
示例#15
0
    def executionTest():
        tables = [
            "orders",
            "lineitem",
        ]
        data_types = [DataType.ORC]

        # Create Tables -----------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_lc, fileSchemaType, tables=tables)

            # Run Query ------------------------------------------------------
            # Parameter to indicate if its necessary to order
            # the resulsets before compare them
            worder = 1
            use_percentage = False
            acceptable_difference = 0.01

            print("==============================")
            print(queryType)
            print("==============================")

            queryId = "TEST_01"
            query = """select TO_DATE(cast(o_orderdate as varchar), '%Y-%m-%d %H:%M:%S') from orders"""
            query_spark = """select TO_DATE(cast(o_orderdate as string), 'yyyy-MM-dd HH:mm:ss') from orders"""
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
                query_spark=query_spark,
            )

            queryId = "TEST_02"
            query = """select TO_TIMESTAMP(cast(o_orderdate as varchar), '%Y-%m-%d %H:%M:%S') from orders"""
            query_spark = """select TO_TIMESTAMP(cast(o_orderdate as string), 'yyyy-MM-dd HH:mm:ss') from orders"""
            runTest.run_query(bc,
                              spark,
                              query,
                              queryId,
                              queryType,
                              worder,
                              "",
                              acceptable_difference,
                              use_percentage,
                              fileSchemaType,
                              query_spark=query_spark)

            queryId = "TEST_03"
            query = """select
                        TO_DATE(
                        substring(cast(l_shipdate as varchar), 1, 4) || '|' ||
                        substring(cast(l_commitdate as varchar), 6, 2) || '|' ||
                        '13',
                        '%Y|%m|%d')
                        from lineitem"""
            query_spark = """select
                            TO_DATE(
                            substring(cast(l_shipdate as string), 1, 4) || '|' ||
                            substring(cast(l_commitdate as string), 6, 2) || '|' ||
                            '13',
                            'yyyy|MM|dd')
                            from lineitem"""
            runTest.run_query(bc,
                              spark,
                              query,
                              queryId,
                              queryType,
                              worder,
                              "",
                              acceptable_difference,
                              use_percentage,
                              fileSchemaType,
                              query_spark=query_spark)

            queryId = "TEST_04"
            query = """select
                        TO_TIMESTAMP(
                        substring(cast(l_shipdate as varchar), 1, 4) || '|' ||
                        substring(cast(l_commitdate as varchar), 6, 2) || '|' ||
                        '13',
                        '%Y|%m|%d')
                        from lineitem"""
            query_spark = """select
                            TO_TIMESTAMP(
                            substring(cast(l_shipdate as string), 1, 4) || '|' ||
                            substring(cast(l_commitdate as string), 6, 2) || '|' ||
                            '13',
                            'yyyy|MM|dd')
                            from lineitem"""
            runTest.run_query(bc,
                              spark,
                              query,
                              queryId,
                              queryType,
                              worder,
                              "",
                              acceptable_difference,
                              use_percentage,
                              fileSchemaType,
                              query_spark=query_spark)
示例#16
0
    def executionTest(queryType):  
        tables = cs.tpchTables
        data_types =  [DataType.DASK_CUDF, DataType.CUDF, DataType.CSV, DataType.ORC, DataType.PARQUET] # TODO json

        bc, dask_client = init_context()

        #Create Tables ------------------------------------------------------------------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType): continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables)
            
            #Run Query -----------------------------------------------------------------------------
            worder = 1
            use_percentage = False
            acceptable_difference = 0.01
            
            queryType = 'Aggregations without group by Test' 
             
            print('==============================')
            print(queryType)
            print('==============================')
             
            queryId = 'TEST_07'
            query = "select COUNT(n1.n_nationkey) as n1key, COUNT(DISTINCT(n2.n_nationkey +  n1.n_nationkey)) as n2key from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 10"
            #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
           
            queryType = 'Coalesce Test'    
               
            print('==============================')
            print(queryType)
            print('==============================')
               
            queryId = 'TEST_02'
            query = "select COALESCE(orders.o_orderkey, 100), COALESCE(orders.o_totalprice, 0.01) from customer left outer join orders on customer.c_custkey = orders.o_custkey where  customer.c_nationkey = 3 and customer.c_custkey < 500"
            #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
             
            queryId = 'TEST_03'
            query = "select COALESCE(orders.o_orderkey, customer.c_custkey), COALESCE(orders.o_totalprice, customer.c_acctbal) from customer left outer join orders on customer.c_custkey = orders.o_custkey where  customer.c_nationkey = 3 and customer.c_custkey < 500"
            #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
             
            queryId = 'TEST_05'
            query = "select COUNT(DISTINCT(COALESCE(n1.n_regionkey,32))), AVG(COALESCE(n1.n_regionkey,32)) from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6 "
            #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) #WSM NEED TO REVISIT THIS
             
            queryId = 'TEST_06'
            query = "select SUM(COALESCE(n2.n_nationkey, 100)), COUNT(DISTINCT(COALESCE(n1.n_nationkey,32))), n2.n_regionkey as n1key from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6 GROUP BY n2.n_regionkey"
            #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) #WSM NEED TO REVISIT THIS
             
            queryId = 'TEST_07'
            query = "select MIN(COALESCE(n.n_nationkey, r.r_regionkey)), MAX(COALESCE(n.n_nationkey, 8)) from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey"
            #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
             
            queryId = 'TEST_08'
            query = "select AVG(COALESCE(n.n_nationkey, r.r_regionkey)), MAX(COALESCE(n.n_nationkey, 8)) , COUNT(COALESCE(n.n_nationkey, 12)),  n.n_nationkey from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey GROUP BY n.n_nationkey"
            #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
     
            queryId = 'TEST_09'
            query = "select SUM(COALESCE(n2.n_nationkey, 100)), COUNT(DISTINCT(COALESCE(n1.n_nationkey,32))), COALESCE(n2.n_regionkey, 100) as n1key from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6 GROUP BY COALESCE(n2.n_regionkey, 100)"
            #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) #WSM NEED TO REVISIT THIS
     
            queryType = 'Commom Table Expressions Test'   
              
            print('==============================')
            print(queryType)
            print('==============================')
              
            queryId = 'TEST_04'
            query = """with ordersTemp as ( 
                select min(orders.o_orderkey) as priorityKey, o_custkey from orders group by o_custkey 
            ), ordersjoin as(
                select orders.o_orderkey, orders.o_custkey/(orders.o_custkey + 1) as o_custkey, (ordersTemp.priorityKey + 1) 
                as priorityKey from orders inner join ordersTemp on ( ordersTemp.priorityKey = orders.o_orderkey)
            ) 
            select (customer.c_custkey + 1)/(customer.c_custkey - customer.c_custkey + 1) from customer 
            inner join ordersjoin on ordersjoin.o_custkey =  customer.c_custkey 
            where (customer.c_custkey > 1 or customer.c_custkey < 100)"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) #WSM NEED TO REVISIT THIS
         
            queryType = 'Count Distinc Test'    
               
            print('==============================')
            print(queryType)
            print('==============================')
               
            queryId = 'TEST_07'
            query = "select count(distinct(o_custkey)), count(distinct(o_totalprice)),  sum(o_orderkey) from orders group by o_custkey" #count(distinct(o_orderdate)),
            #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
             
            queryId = 'TEST_08'
            query = "select COUNT(DISTINCT(n.n_nationkey)), AVG(r.r_regionkey) from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey"
            #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', 0.01, use_percentage, fileSchemaType)
             
            queryId = 'TEST_09'
            query = "select MIN(n.n_nationkey), MAX(r.r_regionkey), COUNT(DISTINCT(n.n_nationkey + r.r_regionkey)) from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey"
            #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
             
            queryId = 'TEST_10'
            query = "select COUNT(DISTINCT(n1.n_nationkey)) as n1key, COUNT(DISTINCT(n2.n_nationkey)) as n2key from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_regionkey"
            #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
             
            queryId = 'TEST_11'
            query = "select r.r_regionkey, n.n_nationkey, COUNT(n.n_nationkey), COUNT(DISTINCT(r.r_regionkey)), SUM(DISTINCT(n.n_nationkey + r.r_regionkey)) from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey GROUP BY r.r_regionkey, n.n_nationkey"
            #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
              
            queryId = 'TEST_12'
            query = "select n1.n_regionkey, n2.n_nationkey,  MIN(n1.n_regionkey), MAX(n1.n_regionkey), AVG(n2.n_nationkey) from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6 GROUP BY n1.n_regionkey, n2.n_nationkey"
            #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
     
            queryType = 'Count without group by Test'    
               
            print('==============================')
            print(queryType)
            print('==============================')
                   
            queryId = 'TEST_01'
            query = "select count(*), count(n_nationkey) from nation"
            #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
             
            queryId = 'TEST_02'
            query = "select count(n_nationkey), count(*) from nation group by n_nationkey"
            #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
          
            queryType = 'Predicates with nulls'   
             
            print('==============================')
            print(queryType)
            print('==============================')
             
            queryId = 'TEST_06'
            query = """select COUNT(n.n_nationkey), AVG(r.r_regionkey) 
            from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey 
            WHERE n.n_regionkey IS NULL"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, print_result = True)
             
            queryId = 'TEST_07'
            query = """select n.n_nationkey, n.n_name, r.r_regionkey, r.r_name 
            from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey 
            WHERE r.r_name IS NULL"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
     
            queryId = 'TEST_08' #Core dump al iniciar el query
            query = """select n.n_nationkey, n.n_name, r.r_regionkey, r.r_name 
            from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey 
            WHERE n.n_name IS NOT NULL"""
            #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
     
            queryType = 'Single Node From Local Test'    

            queryId = 'TEST_01'
            query = "select count(*), count(n_nationkey) from nation"
            #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
             
            queryId = 'TEST_02'
            query = "select count(n_nationkey), count(*) from nation group by n_nationkey"
            #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
          
            queryType = 'Predicates with nulls'   
             
            print('==============================')
            print(queryType)
            print('==============================')
             
            queryId = 'TEST_06'
            query = """select COUNT(n.n_nationkey), AVG(r.r_regionkey) 
            from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey 
            WHERE n.n_regionkey IS NULL"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, print_result = True)
             
            queryId = 'TEST_07'
            query = """select n.n_nationkey, n.n_name, r.r_regionkey, r.r_name 
            from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey 
            WHERE r.r_name IS NULL"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
     
            queryId = 'TEST_08' #Core dump al iniciar el query
            query = """select n.n_nationkey, n.n_name, r.r_regionkey, r.r_name 
            from nation as n left outer join region as r on n.n_nationkey = r.r_regionkey 
            WHERE n.n_name IS NOT NULL"""
            #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
     
            queryType = 'Single Node From Local Test'    
               
            print('==============================')
            print(queryType)
            print('==============================')
     
            queryId = 'TEST_04'
            query = """select count(c_custkey), sum(c_acctbal), avg(c_acctbal), min(c_custkey), max(c_nationkey), (max(c_nationkey) + min(c_nationkey))/2 
                    c_nationkey from customer where c_custkey < 100 group by c_nationkey"""
            #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
     
            queryType = 'Tables From Pandas Test'    
               
            print('==============================')
            print(queryType)
            print('==============================')
             
            queryId = 'TEST_04'
            query = "select count(c_custkey), sum(c_acctbal), avg(c_acctbal), min(c_custkey), max(c_nationkey), (max(c_nationkey) + min(c_nationkey))/2 c_nationkey from customer where c_custkey < 100 group by c_nationkey"
            #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', 0.01, use_percentage, fileSchemaType)
             
            queryType = 'Union Test'    
               
            print('==============================')
            print(queryType)
            print('==============================')
               
            queryId = 'TEST_03'
            query = "(select o_orderkey, o_totalprice as key from orders where o_orderkey < 100) union all (select o_orderkey, o_custkey as key from orders where o_orderkey < 300 and o_orderkey >= 200)"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
     
            queryType = 'Where clause Test'    
               
            print('==============================')
            print(queryType)
            print('==============================')
               
            queryId = 'TEST_10'
            query = "select c_custkey, c_nationkey as nkey from customer where -c_nationkey + c_acctbal > 750.3"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)    
               
            queryId = 'TEST_11'
            query = "select c_custkey, c_nationkey as nkey from customer where -c_nationkey + c_acctbal > 750"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)        
            
            queryId = 'TEST_09'
            query = """select o_orderkey as okey, o_custkey as ckey, o_orderdate as odate from orders 
                       where o_orderstatus = 'O' and o_orderpriority = '1-URGENT' order by okey"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
            
            queryId = 'TEST_10'
            query = """select max(o_totalprice) as max_price, min(o_orderdate) as min_orderdate from orders 
                       where o_orderdate = '1998-08-01'"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
            
            queryId = 'TEST_10'
            query = """select max(o_totalprice) as max_price, min(o_orderdate) as min_orderdate from orders 
                       where o_orderdate > '1998-08-01'"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, print_result = True)
      
            queryType = 'New Queries'    
               
            print('==============================')
            print(queryType)
            print('==============================')
            
            queryId = 'TEST_12'
            query = "select count(n1.n_nationkey) as n1key, count(n2.n_nationkey) as n2key, count(*) as cstar from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) 
                  
            print('==============================')
            print(queryType)
            print('==============================')
     
            queryId = 'TEST_04'
            query = """select count(c_custkey), sum(c_acctbal), avg(c_acctbal), min(c_custkey), max(c_nationkey), (max(c_nationkey) + min(c_nationkey))/2 
                    c_nationkey from customer where c_custkey < 100 group by c_nationkey"""
            #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
     
            queryType = 'Tables From Pandas Test'    
               
            print('==============================')
            print(queryType)
            print('==============================')
             
            queryId = 'TEST_04'
            query = "select count(c_custkey), sum(c_acctbal), avg(c_acctbal), min(c_custkey), max(c_nationkey), (max(c_nationkey) + min(c_nationkey))/2 c_nationkey from customer where c_custkey < 100 group by c_nationkey"
            #runTest.run_query(bc, drill, query, queryId, queryType, worder, '', 0.01, use_percentage, fileSchemaType)
             
            queryType = 'Union Test'    
               
            print('==============================')
            print(queryType)
            print('==============================')
               
            queryId = 'TEST_03'
            query = "(select o_orderkey, o_totalprice as key from orders where o_orderkey < 100) union all (select o_orderkey, o_custkey as key from orders where o_orderkey < 300 and o_orderkey >= 200)"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
     
            queryType = 'Where clause Test'    
               
            print('==============================')
            print(queryType)
            print('==============================')
               
            queryId = 'TEST_10'
            query = "select c_custkey, c_nationkey as nkey from customer where -c_nationkey + c_acctbal > 750.3"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)    
               
            queryId = 'TEST_11'
            query = "select c_custkey, c_nationkey as nkey from customer where -c_nationkey + c_acctbal > 750"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType) 
            
            queryType = 'New Queries'    
               
            print('==============================')
            print(queryType)
            print('==============================')
            
            queryId = 'TEST_12'
            query = "select count(n1.n_nationkey) as n1key, count(n2.n_nationkey) as n2key, count(*) as cstar from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)       
            
            queryType = 'Concat Test'    
               
            print('==============================')
            print(queryType)
            print('==============================')
            
            queryId = 'TEST_09'        
            query = """select o.o_orderkey, c.c_name || '-' || (c.c_custkey + 1) , o.o_orderstatus from orders o 
                       inner join customer c on o.o_custkey = c.c_custkey
                       where c.c_custkey < 20"""

            queryId = 'TEST_04'        
            query = """select c_custkey, SUBSTRING(c_name, 1, 8) from customer 
                    where c_name between 'Customer#000000009' and  'Customer#0000000011'"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)
示例#17
0
    def executionTest():

        tables = ["partsupp", "customer", "nation"]
        data_types = [
            DataType.DASK_CUDF, DataType.CUDF, DataType.CSV, DataType.ORC,
            DataType.PARQUET
        ]  # TODO json

        #Create Tables ------------------------------------------------------------------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            #Run Query -----------------------------------------------------------------------------

            worder = 1  #Parameter to indicate if its necessary to order the resulsets before compare them
            use_percentage = False
            acceptable_difference = 0.01

            print('==============================')
            print(queryType)
            print('==============================')

            queryId = 'TEST_01'
            query = """select SUBSTRING(CAST(ps_partkey as VARCHAR),1,1), ps_availqty from partsupp 
                    where ps_availqty > 7000 and ps_supplycost > 700 order by ps_partkey, ps_availqty limit 50"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_02'
            query = """select c_custkey, c_name from customer where SUBSTRING(c_name,1,17) = 'Customer#00000000'"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_03'
            query = """select c_custkey, SUBSTRING(c_name, 1, 8) from customer 
                    where c_name = 'Customer#000000009'"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_04'
            query = """select * from nation where SUBSTRING(n_name,1,1) = 'I'"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_05'
            query = """select c_custkey, c_name, SUBSTRING(c_name,1,1), SUBSTRING(c_name,2,1), SUBSTRING(c_name,1,2), 
                              SUBSTRING(c_name,2,2) from customer where c_custkey < 20"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_06'
            query = """select c.c_custkey, SUBSTRING(c.c_name, 10, 18), CAST(SUBSTRING(c.c_name, 10, 18) as INT), 
                            CAST(SUBSTRING(c.c_name, 10, 18) as INT) + 1 from customer c
                        where c.c_custkey < 50"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_07'
            query = """select c.c_custkey, SUBSTRING(c.c_name, 1, 8), SUBSTRING(c.c_name, 10, 18) || '**' 
                        from customer c where c.c_custkey < 0"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_08'
            query = """select * from (
                            select c.c_custkey, SUBSTRING(c.c_name, 1, 8) as n1, SUBSTRING(c.c_name, 10, 18) || '**' as n2
                            from customer c where c.c_custkey < 50
                     ) as n where SUBSTRING(n.n1, 1,7) = 'Customer'"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break
示例#18
0
    def executionTest():

        tables = [
            "partsupp",
            "lineitem",
            "part",
            "supplier",
            "orders",
            "customer",
            "region",
            "nation",
        ]
        data_types = [
            DataType.DASK_CUDF,
            DataType.CUDF,
            DataType.CSV,
            DataType.ORC,
            DataType.PARQUET,
        ]  # TODO json

        # Create Tables -----------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            # Run Query ------------------------------------------------------
            # Parameter to indicate if its necessary to order
            # the resulsets before compare them
            worder = 1
            use_percentage = False
            acceptable_difference = 0.01

            print("==============================")
            print(queryType)
            print("==============================")

            queryId = "TEST_01"
            query = """select c_mktsegment || ': ' || c_custkey || ' - ' ||
                    c_name from customer
                    order by c_custkey, c_mktsegment limit 50"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_02"
            query = """select r.r_name || ' ' || n.n_name from region r
                        inner join nation n
                        on n.n_regionkey = r.r_regionkey
                        order by r.r_name"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_03"
            query = """select c.c_name || ' ' || o.o_orderkey, o.o_orderstatus
                    from orders o
                    inner join customer c on o.o_custkey = c.c_custkey
                    where c.c_custkey < 10"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_04"
            query = """select c.c_name, o.o_orderkey, o.o_orderstatus
                    from orders o
                    inner join customer c on o.o_custkey = c.c_custkey
                    where 'Customer#000000' || c.c_custkey like
                    'Customer#0000001'"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_05"
            query = """select c_custkey, 'Cliente: ' || c_name from customer
                       order by c_custkey, c_mktsegment limit 50"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_06"
            query = """select o.o_orderkey || c.c_name, o.o_orderstatus
                    from orders o
                    inner join customer c on o.o_custkey = c.c_custkey
                    where c.c_custkey < 10"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_07"
            query = """select o.o_orderkey, c.c_name ||
                    cast(c.c_custkey as VARCHAR), c.c_name || '-' ||
                    cast(c.c_custkey as VARCHAR), o.o_orderstatus
                    from orders o
                    inner join customer c on o.o_custkey = c.c_custkey
                    where c.c_custkey < 10"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_08"
            query = """select c.c_name || ': ' || c.c_custkey, c.c_name ||
                     ': ' || c.c_comment from customer c
                    where c.c_custkey < 10"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_09"
            query = """select o.o_orderkey, c.c_name || '-' ||
                     (c.c_custkey + 1), o.o_orderstatus from orders o
                    inner join customer c on o.o_custkey = c.c_custkey
                    where c.c_custkey < 20"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_10"
            query = """select * from (
                        select c.c_custkey, 'Customer#000000' ||
                        c.c_custkey as n_name from customer c
                        where c.c_custkey < 10
                    ) as n where n.n_name = 'Customer#000000' ||
                     n.c_custkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_11"
            query = """select c.c_custkey, c.c_name || '- ' ||
                    c.c_custkey, c.c_comment from customer c
                    where c.c_custkey < 0"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break
示例#19
0
    def executionTest():
        tables = ["orders", "customer", "partsupp", "lineitem"]
        data_types = [
            DataType.DASK_CUDF, DataType.CUDF, DataType.CSV, DataType.ORC,
            DataType.PARQUET
        ]  # TODO json

        #Create Tables ------------------------------------------------------------------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            #Run Query -----------------------------------------------------------------------------
            worder = 0
            use_percentage = False
            acceptable_difference = 0.01

            print('==============================')
            print(queryType)
            print('==============================')

            queryId = 'TEST_01'
            query = "select o_orderkey from orders order by 1 limit 10"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_02'
            query = "select o_orderdate, o_orderkey, o_clerk from orders order by o_orderdate, o_orderkey, o_custkey, o_orderstatus, o_clerk limit 1000"
            query_spark = "select o_orderdate, o_orderkey, o_clerk from orders order by o_orderdate nulls last, o_orderkey nulls last, o_custkey nulls last, o_orderstatus nulls last, o_clerk nulls last limit 1000"
            if fileSchemaType == DataType.ORC:
                runTest.run_query(bc,
                                  spark,
                                  query,
                                  queryId,
                                  queryType,
                                  worder,
                                  '',
                                  acceptable_difference,
                                  use_percentage,
                                  fileSchemaType,
                                  query_spark=query_spark)
            else:
                runTest.run_query(bc, drill, query, queryId, queryType, worder,
                                  '', acceptable_difference, use_percentage,
                                  fileSchemaType)

            queryId = 'TEST_03'
            query = """select o_orderkey from orders where o_custkey < 300 and o_orderdate >= '1990-08-01' 
                    order by o_orderkey limit 50"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_04'
            query = """select ps_partkey, ps_availqty from partsupp where ps_availqty < 3 and ps_availqty >= 1 
                    order by ps_partkey, ps_availqty limit 50"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            #         queryId = 'TEST_05'
            #         query = """select o_orderkey, o_orderstatus from orders where o_custkey < 10 and o_orderstatus = 'O'
            #                 order by o_orderkey, o_orderstatus limit 50"""
            #         runTest.run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)

            queryId = 'TEST_06'
            query = """select orders.o_totalprice, customer.c_name from orders
                      inner join customer on orders.o_custkey = customer.c_custkey
                      order by customer.c_name, orders.o_orderkey limit 10"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_07'
            query = """(select l_shipdate, l_orderkey, l_linestatus from lineitem where l_linenumber = 1 order by 1, 2, 3, l_linenumber limit 10)
                    union all
                    (select l_shipdate, l_orderkey, l_linestatus from lineitem where l_linenumber = 1 order by 1 desc, 2, 3, l_linenumber limit 10)"""
            query_spark = """(select l_shipdate, l_orderkey, l_linestatus from lineitem where l_linenumber = 1 order by 1 nulls last, 2 nulls last, 3 nulls last, l_linenumber nulls last limit 10)
                    union all
                    (select l_shipdate, l_orderkey, l_linestatus from lineitem where l_linenumber = 1 order by 1 desc nulls first, 2 nulls last, 3 nulls last, l_linenumber nulls last limit 10)"""

            if fileSchemaType == DataType.ORC:
                runTest.run_query(bc,
                                  spark,
                                  query,
                                  queryId,
                                  queryType,
                                  1,
                                  '',
                                  acceptable_difference,
                                  use_percentage,
                                  fileSchemaType,
                                  query_spark=query_spark)
            else:
                runTest.run_query(bc, drill, query, queryId, queryType, 1, '',
                                  acceptable_difference, use_percentage,
                                  fileSchemaType)

            queryId = 'TEST_08'
            query = "select c_custkey from customer where c_custkey < 0 order by c_custkey limit 40"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_09'
            query = "select c_custkey, c_name from customer where c_custkey < 10 order by 1 limit 30"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_10'
            query = "select c_custkey, c_name from customer where c_custkey < 10 limit 30"
            runTest.run_query(bc, drill, query, queryId, queryType, 1, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_11'
            query = "select avg(CAST(c_custkey AS DOUBLE)), min(c_custkey) from customer limit 5"
            runTest.run_query(bc, drill, query, queryId, queryType, 1, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)
示例#20
0
    def executionTest():
        tables = ["customer", "orders", "nation", "region"]
        data_types = [
            DataType.DASK_CUDF,
            DataType.CUDF,
            DataType.CSV,
            DataType.ORC,
            DataType.PARQUET,
        ]  # TODO json

        # Create Tables -----------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            # Run Query ------------------------------------------------------
            # Parameter to indicate if its necessary to order
            # the resulsets before compare them
            worder = 1
            use_percentage = False
            acceptable_difference = 0.01

            print("==============================")
            print(queryType)

            print("==============================")

            queryId = "TEST_01"
            query = """select o_orderkey, sum(o_totalprice)/count(o_orderstatus)
                    from orders where o_custkey < 100
                    group by o_orderstatus, o_orderkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_02"
            query = """select o_orderkey, o_orderstatus
                from orders where o_custkey < 10
                and o_orderstatus <> 'O'
                order by o_orderkey, o_orderstatus limit 50"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_03"
            query = """select count(o_orderstatus)
                    from orders where o_orderstatus <> 'O'"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_04"
            query = """select count(o_orderkey), sum(o_orderkey), o_clerk
                    from orders where o_custkey < 1000
                    group by o_clerk, o_orderstatus"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_05"
            query = """select avg(CAST(o_orderkey AS DOUBLE))
                    from orders group by o_orderstatus"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_06"
            query = """select count(o_shippriority), sum(o_totalprice)
                    from orders group by o_shippriority"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_07"
            query = """with regionTemp as (
                        select r_regionkey, r_name
                        from region where r_regionkey > 2
                    ), nationTemp as (
                        select n_nationkey, n_regionkey as fkey, n_name
                        from nation where n_nationkey > 3
                        order by n_nationkey
                    )
                    select regionTemp.r_name, nationTemp.n_name
                    from regionTemp inner join nationTemp
                    on regionTemp.r_regionkey = nationTemp.fkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_08"
            query = """select c_custkey, CHAR_LENGTH(c_comment)
                    from customer where MOD(CHAR_LENGTH(c_comment), 7) = 0"""
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_09"
            query = "select sum(CHAR_LENGTH(c_comment)) from customer"
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break
示例#21
0
    def executionTest():
        tables = [
            "customer", "part", "region", "nation", "orders", "supplier",
            "partsupp"
        ]
        data_types = [DataType.JSON]

        # Create Tables -----------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            # Run Query -----------------------------------------------------
            # Parameter to indicate if its necessary to order the
            # resulsets before compare them
            worder = 1
            use_percentage = False
            acceptable_difference = 0.01

            print("==============================")
            print(queryType)
            print("==============================")

            queryId = "TEST_01"
            query = """select MIN(n.n_nationkey), MAX(r.r_regionkey),
                    AVG(n.n_nationkey + r.r_regionkey) from nation as n
                    left outer join region as r
                    on n.n_nationkey = r.r_regionkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_02"
            query = """select SUM(n1.n_nationkey) as n1key,
                    AVG(n2.n_nationkey +  n1.n_nationkey ) as n2key
                    from nation as n1 full outer join nation as n2
                    on n1.n_nationkey = n2.n_nationkey + 10"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_03"
            query = """select o_totalprice, o_custkey,
                    case when o_totalprice > 100000.2 then o_totalprice
                    else null end
                    from orders where o_orderkey < 20"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_04"
            query = """select cast(o_orderdate AS TIMESTAMP) from orders
                    where cast(o_orderdate as TIMESTAMP)
                    between '1995-01-01' and '1995-01-05'"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_05"
            query = """ WITH
                t1_l AS ( SELECT * FROM orders ),
                t1_r AS ( SELECT * FROM customer ),
                main_lr AS(
                    SELECT
                        COALESCE(o.o_comment, c.c_comment) AS info
                    FROM
                        t1_l o FULL JOIN t1_r c
                        ON  o.o_custkey = c.c_custkey
                        AND o.o_orderkey = c.c_nationkey
                ) SELECT * FROM main_lr
                """
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_06"
            query = """select o.o_orderkey, c.c_name ||
                    cast(c.c_custkey as VARCHAR), c.c_name || '-' ||
                    cast(c.c_custkey as VARCHAR), o.o_orderstatus
                    from orders o
                    inner join customer c on o.o_custkey = c.c_custkey
                    where c.c_custkey < 10"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_07"
            query = """select n1.n_regionkey, n2.n_nationkey,
                    MIN(n1.n_regionkey), MAX(n1.n_regionkey),
                    AVG(n2.n_nationkey)
                    from nation as n1 full outer join nation as n2
                    on n1.n_nationkey = n2.n_nationkey + 6
                    GROUP BY n1.n_regionkey, n2.n_nationkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_08"
            query = """SELECT c_custkey, count(c_nationkey),
                    min(c_nationkey), sum(c_nationkey)
                    from customer group by c_custkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_09"
            query = """select o_orderkey as okey, o_custkey as ckey,
                    (EXTRACT(YEAR FROM o_orderdate) - 5) from orders
                    where o_orderstatus = 'O' order by okey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_10"
            query = """select n1.n_nationkey as n1key,
                        n2.n_nationkey as n2key,
                        n1.n_nationkey + n2.n_nationkey
                    from nation as n1
                    full outer join nation as n2
                    on n1.n_nationkey = n2.n_nationkey + 6
                    where n1.n_nationkey < 10 and n1.n_nationkey > 5"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_11"
            query = """select n1.n_nationkey as n1key,
                        n2.n_nationkey as n2key,
                        n1.n_nationkey + n2.n_nationkey
                    from nation as n1
                    full outer join nation as n2
                    on n1.n_nationkey = n2.n_nationkey + 6
                    and n1.n_nationkey + 1 = n2.n_nationkey + 7
                    and n1.n_nationkey + 2 = n2.n_nationkey + 8"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_12"
            query = """select count(c_custkey) + sum(c_acctbal) +
                        avg(c_acctbal), min(c_custkey) - max(c_nationkey),
                        c_nationkey * 2 as key
                    from customer where  c_nationkey * 2 < 40
                    group by  c_nationkey * 2"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_13"
            query = """select c.c_custkey, r.r_regionkey,
                    c.c_custkey + r.r_regionkey as addy from customer as c
                    inner join region as r on c.c_nationkey = r.r_regionkey
                    where c.c_acctbal < 1000 group by r.r_regionkey,
                    c.c_custkey order by c.c_custkey desc"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_14"
            query = """SELECT n.n_nationkey + 1, n.n_regionkey from nation
                    AS n INNER JOIN region AS r
                    ON n.n_regionkey = r.r_regionkey
                    and n.n_nationkey = 5"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_15"
            query = """select n.n_nationkey, r.r_regionkey
                    from nation as n left outer join region as r
                    on n.n_regionkey = r.r_regionkey
                    where n.n_nationkey < 10
                    and n.n_nationkey > 5"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "n_nationkey",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_16"
            query = """select partSuppTemp.partKey, partAnalysis.avgSize
                    from
                    (
                        select min(p_partkey) as partKey,
                        avg(CAST(p_size AS DOUBLE)) as avgSize,
                        max(p_retailprice) as maxPrice,
                        min(p_retailprice) as minPrice from part
                    ) as partAnalysis
                    inner join
                    (
                        select ps_partkey as partKey, ps_suppkey as suppKey
                        from partsupp where ps_availqty > 2
                    ) as partSuppTemp
                    on partAnalysis.partKey = partSuppTemp.partKey
                    inner join
                    (
                        select s_suppkey as suppKey from supplier
                    ) as supplierTemp
                    on supplierTemp.suppKey = partSuppTemp.suppKey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_17"
            query = """ select p.p_brand, p.p_type, p.p_size,
                            count(ps.ps_suppkey) as supplier_cnt
                        from partsupp ps
                        inner join part p on p.p_partkey = ps.ps_partkey
                        where
                            p.p_brand <> 'Brand#45'
                            and p.p_size in (49, 14, 23, 45, 19, 3, 36, 9)
                            and ps.ps_supplycost < p.p_retailprice
                        group by
                            p.p_brand, p.p_type, p.p_size
                        order by
                            supplier_cnt desc, p.p_brand, p.p_type, p.p_size"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_18"
            query = """select c_custkey + c_nationkey, c_acctbal
                    from customer order by 1 desc, 2"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_19"
            query = """(select o_orderkey, o_custkey from orders
                        where o_orderkey < 100
                    )
                    union all
                    (select o_orderkey, o_custkey from orders
                        where o_orderkey < 300
                        and o_orderkey >= 200) order by 2"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "o_orderkey",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_20"
            query = """select avg(CAST(c_custkey AS DOUBLE)), min(c_custkey)
                    from customer limit 5"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break
示例#22
0
    def executionTest(queryType):
        tables = ["nation", "region", "customer", "orders", "lineitem"]
        # TODO json
        data_types = [
            DataType.DASK_CUDF,
            DataType.CUDF,
            DataType.CSV,
            DataType.ORC,
            DataType.PARQUET,
        ]  # TODO json

        # Create Tables -----------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            # Run Query ------------------------------------------------------
            # Parameter to indicate if its necessary to order
            # the resulsets before compare them
            worder = 1
            use_percentage = False
            acceptable_difference = 0.01

            print("==============================")
            print(queryType)
            print("==============================")

            queryId = "TEST_01"
            query = """select n.n_nationkey, COALESCE(r.r_regionkey,-1)
                    from nation as n left outer join region as r
                    on n.n_nationkey = r.r_regionkey
                    where n.n_nationkey < 10"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_02"
            query = """select COALESCE(orders.o_orderkey, 100),
                    COALESCE(orders.o_totalprice, 0.01) from customer
                    left outer join orders
                    on customer.c_custkey = orders.o_custkey
                    where customer.c_nationkey = 3
                    and customer.c_custkey < 500"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_03"
            query = """select COALESCE(orders.o_orderkey, customer.c_custkey),
                    COALESCE(orders.o_totalprice, customer.c_acctbal)
                    from customer left outer join orders
                    on customer.c_custkey = orders.o_custkey
                    where customer.c_nationkey = 3
                    and customer.c_custkey < 500"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_04"
            query = """select customer.c_custkey, orders.o_orderkey,
                    COALESCE(orders.o_custkey,123456) from customer
                    left outer join orders
                    on customer.c_custkey = orders.o_custkey
                    where customer.c_nationkey = 3
                    and customer.c_custkey < 500"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_05"
            query = """select COUNT(DISTINCT(COALESCE(n1.n_regionkey, 32))),
                    AVG(CAST(COALESCE(n1.n_regionkey, 32) as float)) from nation as n1
                    full outer join nation as n2
                    on n1.n_nationkey = n2.n_nationkey + 6"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_06"
            query = """select SUM(COALESCE(n2.n_nationkey, 100)),
                    COUNT(DISTINCT(COALESCE(n1.n_nationkey,32))),
                    n2.n_regionkey as n1key from nation as n1
                    full outer join nation as n2
                    on n1.n_nationkey = n2.n_nationkey + 6
                    GROUP BY n2.n_regionkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_07"
            query = """select MIN(COALESCE(n.n_nationkey, r.r_regionkey)),
                    MAX(COALESCE(n.n_nationkey, 8)) from nation as n
                    left outer join region as r
                    on n.n_nationkey = r.r_regionkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_08"
            query = """select AVG(CAST(COALESCE(n.n_nationkey,
                    r.r_regionkey) AS DOUBLE)),
                    MAX(COALESCE(n.n_nationkey, 8)),
                    COUNT(COALESCE(n.n_nationkey, 12)), n.n_nationkey
                    from nation as n left outer join region as r
                    on n.n_nationkey = r.r_regionkey
                    GROUP BY n.n_nationkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = 'TEST_09'
            query = """select SUM(COALESCE(n2.n_nationkey, 100)),
                  COUNT(DISTINCT(COALESCE(n1.n_nationkey,32))),
                  COALESCE(n2.n_regionkey, 100) as n1key from nation as n1
                  full outer join nation as n2
                  on n1.n_nationkey = n2.n_nationkey + 6
                  GROUP BY COALESCE(n2.n_regionkey, 100)"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_10"
            query = "SELECT COALESCE(l_shipinstruct, l_comment) FROM lineitem"
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_11"
            query = """select n.n_nationkey,
                    COALESCE(r.r_comment, n.n_comment) from nation as n
                    left outer join region as r
                    on n.n_nationkey = r.r_regionkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_12"
            query = """SELECT COALESCE(l.l_shipinstruct, o.o_orderstatus)
                    FROM lineitem l inner join orders o
                    on l.l_orderkey = o.o_orderkey
                    where o.o_totalprice < 1574.23"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_13"
            query = """ WITH
                t1_l AS ( SELECT * FROM orders ),
                t1_r AS ( SELECT * FROM customer ),
                main_lr AS(
                    SELECT
                        COALESCE(o.o_comment, c.c_comment) AS info
                    FROM
                        t1_l o FULL JOIN t1_r c
                        ON  o.o_custkey = c.c_custkey
                        AND o.o_orderkey = c.c_nationkey
                ) SELECT * FROM main_lr
                """
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_14"
            query = """
            WITH
            ltable3 AS (
                select lineitem.l_orderkey as orderkey,
                        lineitem.l_linestatus as linestatus
                from lineitem
                where mod(lineitem.l_orderkey, 2) = 0
            ),

            rtable1 AS (
                select lineitem.l_orderkey as orderkey,
                        lineitem.l_linestatus as linestatus
                from lineitem
                where mod(lineitem.l_partkey, 6) = 0
            ),
            rtable2 AS (
                select lineitem.l_orderkey as orderkey,
                        lineitem.l_linestatus as linestatus
                from lineitem
                where mod(lineitem.l_suppkey, 4) = 0
            ),
            rtable3 AS (
                select coalesce(l.orderkey, r.orderkey)  as orderkey,
                        coalesce(l.linestatus, r.linestatus) as linestatus
                from rtable1 l full join rtable2 r
                on l.orderkey = r.orderkey
                -- and l.linestatus = r.linestatus
            ),

            lastjoin AS (
                select l.orderkey,
                        coalesce(l.linestatus, r.linestatus) as linestatus
                from ltable3 l full join rtable3 r
                on l.orderkey = r.orderkey
                and l.linestatus = r.linestatus
            )

            select * from lastjoin
            """
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_15"
            query = """select n.n_nationkey, COALESCE(r.r_regionkey,-1)
                    from nation as n right outer join region as r
                    on n.n_nationkey = r.r_regionkey
                    where n.n_nationkey < 10"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_16"
            query = """select COALESCE(orders.o_orderkey, 100),
                    COALESCE(orders.o_totalprice, 0.01) from customer
                    right outer join orders
                    on customer.c_custkey = orders.o_custkey
                    where customer.c_nationkey = 3
                    and customer.c_custkey < 500"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_17"
            query = """select MIN(COALESCE(n.n_nationkey, r.r_regionkey)),
                    MAX(COALESCE(n.n_nationkey, 8)) from nation as n
                    right outer join region as r
                    on n.n_nationkey = r.r_regionkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_18"
            query = """select AVG(CAST(COALESCE(n.n_nationkey,
                    r.r_regionkey) AS DOUBLE)),
                    MAX(COALESCE(n.n_nationkey, 8)),
                    COUNT(COALESCE(n.n_nationkey, 12)), n.n_nationkey
                    from nation as n right outer join region as r
                    on n.n_nationkey = r.r_regionkey
                    GROUP BY n.n_nationkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break
示例#23
0
    def executionTest():

        tables = ["lineitem", "orders", "nation", "region"]
        data_types = [
            DataType.DASK_CUDF,
            DataType.CUDF,
            DataType.CSV,
            DataType.PARQUET,
        ]  # TODO json

        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            # Run Query ------------------------------------------------------
            # Parameter to indicate if its necessary to order
            # the resulsets before compare them
            worder = 1
            use_percentage = False
            acceptable_difference = 0.01

            print("==============================")
            print(queryType)
            print("==============================")

            queryId = "TEST_01"
            query = """select o_orderkey, DAYOFWEEK(o_orderdate) as day_of_week
                    from orders where o_orderkey < 250 order by o_orderkey"""
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_02"
            query = """select o_orderkey, o_totalprice, DAYOFWEEK(o_orderdate) as day_of_week
                    from orders where o_orderkey < 1850 and DAYOFWEEK(o_orderdate) = 6
                    order by o_orderkey"""
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_03"
            query = """select o_orderkey, case when DAYOFWEEK(o_orderdate) = 6
                    OR DAYOFWEEK(o_orderdate) = 7 then 'Weekend'
                    else 'Weekday' end as day_of_week
                    from orders where o_orderkey > 5450 order by o_orderkey"""
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_04"
            query = """ with dayofweektable as (
                        select o_orderkey, DAYOFWEEK(o_orderdate) as num_of_week from orders
                    )
                    select o_orderkey, num_of_week, 
                        case when num_of_week = 1 then 'Mon'
                        when num_of_week = 2 then 'Tue'
                        when num_of_week = 3 then 'Wed'
                        when num_of_week = 4 then 'Thu'
                        when num_of_week = 5 then 'Fri'
                        when num_of_week = 6 then 'Sat'
                        else 'Sun' end as day_of_week
                    from dayofweektable order by o_orderkey limit 100"""
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_05"
            query = """with ordersdaystable as (
                        select o_orderkey as key, DAYOFWEEK(o_orderdate) as num_of_week from orders
                    ), lineitemdaystable as (
                        select l_orderkey as key, DAYOFWEEK(l_shipdate) as num_of_week from lineitem
                    )
                    select 'Saturday' as day_, count(o.num_of_week) as n_days
                    from ordersdaystable as o
                    inner join lineitemdaystable as l 
                    ON o.key = l.key
                    where l.num_of_week = 6
                    """
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_06"
            query = """with ordersperutable as (
                        select o_orderkey, DAYOFWEEK(o_orderdate) as num_of_week, n_name as country
                        from orders
                        inner join nation on DAYOFWEEK(o_orderdate) = n_nationkey
                        where n_name in ('PERU', 'ARGENTINA', 'BRAZIL', 'UNITED STATES') 
                    ), lineitemamericatable as (
                        select l_orderkey, DAYOFWEEK(l_shipdate) as num_of_week, r_name as region
                        from lineitem
                        inner join region on DAYOFWEEK(l_shipdate) = r_regionkey
                        where r_name = 'AMERICA'
                    )
                    select o_orderkey, o.num_of_week as num_day_o, 
                    case when o.num_of_week = 1 then 'Mon'
                        when o.num_of_week = 2 then 'Tue'
                        when o.num_of_week = 3 then 'Wed'
                        when o.num_of_week = 4 then 'Thu'
                        when o.num_of_week = 5 then 'Fri'
                        when o.num_of_week = 6 then 'Sat'
                        else 'Sun' end as day_of_week
                    from ordersperutable as o
                    inner join lineitemamericatable as l 
                    ON o_orderkey = l_orderkey
                    where o.num_of_week <> 7
                    and l.num_of_week <> 7
                    order by o_orderkey, o.num_of_week
                    limit 75"""
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break
示例#24
0
    def executionTest():
        tables = ["customer", "nation"]
        data_types = [
            DataType.DASK_CUDF,
            DataType.CUDF,
            DataType.CSV,
            DataType.ORC,
            DataType.PARQUET,
        ]  # TODO json

        # Create Tables -----------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            # Run Query ------------------------------------------------------
            # Parameter to indicate if its necessary to order
            # the resulsets before compare them
            worder = 1
            use_percentage = False
            acceptable_difference = 0.01

            print("==============================")
            print(queryType)
            print("==============================")

            queryId = "TEST_01"
            query = """select sin(c_acctbal), cos(c_acctbal), asin(c_acctbal),
                        acos(c_acctbal), ln(c_acctbal), tan(c_acctbal),
                        atan(c_acctbal), floor(c_acctbal), ceil(c_acctbal),
                        c_acctbal
                    from customer"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_02"
            query = """select sin(c_acctbal), cos(c_acctbal),
                        asin(c_acctbal), acos(c_acctbal), c_acctbal
                    from customer"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_03"
            query = """select sin(c_acctbal), cos(c_acctbal),
                        asin(c_acctbal), acos(c_acctbal),
                        ln(c_acctbal), c_acctbal
                    from customer"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_04"
            query = """select sin(c_acctbal), cos(c_acctbal),
                        asin(c_acctbal), acos(c_acctbal),
                        ln(c_acctbal), tan(c_acctbal),
                        atan(c_acctbal), c_acctbal
                    from customer"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_05"
            query = """select sin(c_acctbal), cos(c_acctbal),
                        asin(c_acctbal), acos(c_acctbal),
                        ln(c_acctbal), tan(c_acctbal), atan(c_acctbal),
                        floor(c_acctbal), c_acctbal
                    from customer"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_06"
            query = "select floor(c_acctbal), c_acctbal from customer"
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )
            
            # This is not considered an unaryOp (-) query but need to be considered
            queryId = "TEST_07"
            query = "select n_nationkey, -n_nationkey from nation"
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            # This is not considered an unaryOp (-) query but need to be considered
            queryId = "TEST_08"
            query = "select -(cast(n_nationkey as double)) from nation"
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break
示例#25
0
    def executionTest():
        tables = ["nation", "region", "orders", "lineitem"]
        data_types = [
            DataType.DASK_CUDF,
            DataType.CUDF,
            DataType.CSV,
            DataType.ORC,
            DataType.PARQUET,
        ]  # TODO json

        # Create Tables -----------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            # Run Query ------------------------------------------------------
            # Parameter to indicate if its necessary to order
            # the resulsets before compare them
            worder = 1
            use_percentage = False
            acceptable_difference = 0

            print("==============================")
            print(queryType)
            print("==============================")

            queryId = "TEST_01"
            query = """select n.n_nationkey, r.r_regionkey
                    from nation as n right outer join region as r
                    on n.n_nationkey = r.r_regionkey
                    where n.n_nationkey < 10"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "n_nationkey",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_02"
            query = """select n.n_nationkey, r.r_regionkey,
                        n.n_nationkey + r.r_regionkey
                    from nation as n right outer join region as r
                    on n.n_nationkey = r.r_regionkey
                    where n.n_nationkey < 10"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "n_nationkey",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_03"
            query = """select n.n_nationkey, r.r_regionkey
                    from nation as n right outer join region as r
                    on n.n_regionkey = r.r_regionkey
                    where n.n_nationkey < 10"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "n_nationkey",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_04"
            query = """select n.n_nationkey, r.r_regionkey
                    from nation as n right outer join region as r
                    on n.n_regionkey = r.r_regionkey
                    where n.n_nationkey < 10
                    and n.n_nationkey > 5"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "n_nationkey",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_05"
            query = """select l.l_orderkey, l.l_partkey, l.l_quantity, o.o_totalprice, o.o_clerk
                    from lineitem as l right outer join orders as o
                    on l.l_orderkey = o.o_orderkey
                    where o.o_totalprice < 87523.2
                    and l.l_returnflag in ('A', 'R')
                    order by o.o_totalprice"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                0.01,
                use_percentage,
                fileSchemaType,
            )

            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break
示例#26
0
    def executionTest():
        tables = [
            "nation", "region", "customer", "lineitem", "orders", "supplier"
        ]
        data_types = [
            DataType.DASK_CUDF,
            DataType.CUDF,
            DataType.CSV,
            DataType.ORC,
            DataType.PARQUET,
        ]  # TODO json

        # Create Tables -----------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            # Run Query ------------------------------------------------------
            # Parameter to indicate if its necessary to order
            # the resulsets before compare them
            worder = 1
            use_percentage = False
            acceptable_difference = 0

            print("==============================")
            print(queryType)
            print("==============================")

            queryId = "TEST_01"
            query = """select MIN(n.n_nationkey), MAX(r.r_regionkey),
                    AVG(CAST((n.n_nationkey + r.r_regionkey) AS DOUBLE))
                    from nation as n
                    left outer join region as r
                    on n.n_nationkey = r.r_regionkey
                    where n.n_nationkey IS NULL"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
                print_result=True,
            )

            queryId = "TEST_02"
            query = """select SUM(n1.n_nationkey) as n1key,
                    AVG(CAST((n2.n_nationkey +  n1.n_nationkey) AS DOUBLE))
                    as n2key
                    from nation as n1
                    full outer join nation as n2
                    on n1.n_nationkey = n2.n_nationkey + 10
                    where n1.n_nationkey IS NOT NULL"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                0.01,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_03"
            query = """select COUNT(n1.n_nationkey) as n1key,
                    COUNT(n2.n_nationkey +  n1.n_nationkey) as n2key
                    from nation as n1 full outer join nation as n2
                    on n1.n_nationkey = n2.n_nationkey + 10
                    where n1.n_nationkey IS NOT NULL"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_04"
            query = """select COUNT(n1.n_regionkey),
                        AVG(CAST(n1.n_regionkey AS DOUBLE))
                    from nation as n1 full outer join nation as n2
                    on n1.n_nationkey = n2.n_nationkey + 6
                    WHERE n1.n_regionkey IS NOT NULL"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                0.01,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_05"
            query = """select MIN(n.n_nationkey), MAX(n.n_nationkey)
                    from nation as n left outer join region as r
                    on n.n_nationkey = r.r_regionkey
                    WHERE n.n_nationkey IS NULL"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
                print_result=True,
            )

            # queryId = 'TEST_06'
            # query = """select COUNT(n.n_nationkey), AVG(r.r_regionkey)
            # from nation as n left outer join region as r
            # on n.n_nationkey = r.r_regionkey
            # WHERE n.n_regionkey IS NULL"""
            # runTest.run_query(bc, drill, query, queryId, queryType,
            #  worder, '', acceptable_difference, use_percentage,
            # fileSchemaType, print_result = True)

            queryId = "TEST_07"
            query = """select n.n_nationkey, n.n_name, r.r_regionkey,
                        r.r_name
                    from nation as n left outer join region as r
                    on n.n_nationkey = r.r_regionkey
                    WHERE r.r_name IS NULL"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_08"
            query = """select n.n_nationkey, n.n_name, r.r_regionkey,
                        r.r_name
                    from nation as n left outer join region as r
                    on n.n_nationkey = r.r_regionkey
                    WHERE n.n_name IS NOT NULL"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break
示例#27
0
    def executionTest():
        tables = ["nation", "region", "customer", "orders", "lineitem"]
        data_types = [
            DataType.DASK_CUDF, DataType.CUDF, DataType.CSV, DataType.ORC,
            DataType.PARQUET
        ]  # TODO json

        #Create Tables ------------------------------------------------------------------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            #Run Query -----------------------------------------------------------------------------
            worder = 1  #Parameter to indicate if its necessary to order the resulsets before compare them
            use_percentage = False
            acceptable_difference = 0.01

            print('==============================')
            print(queryType)
            print('==============================')

            queryId = 'TEST_01'
            query = """select EXTRACT(YEAR FROM l_receiptdate) - EXTRACT(YEAR FROM l_shipdate) as years_late, 
            EXTRACT(MONTH FROM l_receiptdate) - EXTRACT(MONTH FROM l_shipdate) as months_late, 
            EXTRACT(DAY FROM l_receiptdate) - EXTRACT(DAY FROM l_shipdate) as days_late 
            from lineitem where l_shipdate < DATE '1993-01-01'"""
            if fileSchemaType == DataType.ORC:
                runTest.run_query(bc, spark, query, queryId, queryType, worder,
                                  '', acceptable_difference, use_percentage,
                                  fileSchemaType)
            else:
                runTest.run_query(bc, drill, query, queryId, queryType, worder,
                                  '', acceptable_difference, use_percentage,
                                  fileSchemaType)

            queryId = 'TEST_02'
            query = "select o_orderkey as okey, o_custkey as ckey, (EXTRACT(YEAR FROM o_orderdate) - 5) from orders where o_orderstatus = 'O' order by okey"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_03'
            query = """select orders.o_orderkey, orders.o_orderdate, orders.o_orderstatus 
            from orders inner join lineitem on lineitem.l_orderkey = orders.o_orderkey
            where orders.o_orderkey < 30 and lineitem.l_orderkey < 20
            order by orders.o_orderkey, lineitem.l_linenumber, orders.o_custkey, lineitem.l_orderkey"""
            if fileSchemaType == DataType.ORC:
                runTest.run_query(bc, spark, query, queryId, queryType, worder,
                                  '', acceptable_difference, use_percentage,
                                  fileSchemaType)
            else:
                runTest.run_query(bc, drill, query, queryId, queryType, worder,
                                  '', acceptable_difference, use_percentage,
                                  fileSchemaType)

            queryId = 'TEST_04'
            query = """select customer.c_nationkey, customer.c_name, orders.o_orderdate, lineitem.l_receiptdate 
            from customer left outer join orders on customer.c_custkey = orders.o_custkey
            inner join lineitem on lineitem.l_orderkey = orders.o_orderkey
            where customer.c_nationkey = 3 and customer.c_custkey < 100 and orders.o_orderdate < '1990-01-01'
            order by orders.o_orderkey, lineitem.l_linenumber"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_05'
            query = """select orders.o_orderkey, orders.o_orderdate, lineitem.l_receiptdate, orders.o_orderstatus 
            from orders inner join lineitem on lineitem.l_receiptdate = orders.o_orderdate
            where orders.o_orderkey < 30 and lineitem.l_orderkey < 20
            order by orders.o_orderkey, lineitem.l_linenumber"""
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)
示例#28
0
    def executionTest():
        #Read Data TPCH------------------------------------------------------------------------------------------------------------
        authority = 'hdfsdisk'
        ktoken = '../KrbHDFS/myconf/krb5cc_0'
        krbticket = os.path.abspath(ktoken)
        hdfs_host = '172.22.0.3'
        hdfs_port = 9000
        hdfs_driver = 'libhdfs'
        print("Using krb ticket: " + krbticket)
        result, error_msg, fs = bc.hdfs(authority,
                                        host=hdfs_host,
                                        port=hdfs_port,
                                        user='******',
                                        driver=hdfs_driver,
                                        kerb_ticket=krbticket)

        if result == False:
            msg = "WARNING: Could not connect to HDFS instance %s:%d using driver %s, error was: %s" % (
                hdfs_host, hdfs_port, hdfs_driver, error_msg)
            print(msg)
            print("WARNING: Will ignore " + queryType)
            return

        print("Success connection to HDFS:")
        print(fs)

        hdfs_dir_data_lc = "hdfs://" + authority + dir_data_lc
        print("TPCH files at: " + hdfs_dir_data_lc)

        tables = [
            'nation', 'region', 'supplier', 'customer', 'lineitem', 'orders'
        ]
        data_types = [DataType.CSV, DataType.ORC,
                      DataType.PARQUET]  # TODO parquet json

        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc,
                             hdfs_dir_data_lc,
                             fileSchemaType,
                             tables=tables)

            #   Run Query -----------------------------------------------------------------------------
            worder = 1  # Parameter to indicate if its necessary to order the resulsets before compare them
            use_percentage = False
            acceptable_difference = 0.01

            print('==============================')
            print(queryType)
            print('==============================')

            queryId = 'TEST_01'
            query = "select count(c_custkey) as c1, count(c_acctbal) as c2 from customer"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_02'
            query = "select count(n_nationkey), count(n_regionkey) from nation"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_03'
            query = "select count(s_suppkey), count(s_nationkey) from supplier"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_04'
            query = "select count(c_custkey), sum(c_acctbal), sum(c_acctbal)/count(c_acctbal), min(c_custkey), max(c_nationkey), (max(c_nationkey) + min(c_nationkey))/2 c_nationkey from customer where c_custkey < 100 group by c_nationkey"
            runTest.run_query(
                bc, drill, query, queryId, queryType, worder, '', 0.01, True,
                fileSchemaType)  #TODO: Change sum/count for avg KC

            queryId = 'TEST_05'
            query = "select c.c_custkey, c.c_nationkey, n.n_regionkey from customer as c inner join nation as n on c.c_nationkey = n.n_nationkey where n.n_regionkey = 1 and c.c_custkey < 50"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_06'
            query = "select c_custkey, c_nationkey, c_acctbal from customer order by c_nationkey, c_custkey, c_acctbal"
            runTest.run_query(bc, drill, query, queryId, queryType, 0, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_07'
            query = "select c_custkey + c_nationkey, c_acctbal from customer order by 1, 2"
            runTest.run_query(bc, drill, query, queryId, queryType, 0, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_08'
            query = "select n1.n_nationkey as supp_nation, n2.n_nationkey as cust_nation, l.l_extendedprice * l.l_discount from supplier as s inner join lineitem as l on s.s_suppkey = l.l_suppkey inner join orders as o on o.o_orderkey = l.l_orderkey inner join customer as c on c.c_custkey = o.o_custkey inner join nation as n1 on s.s_nationkey = n1.n_nationkey inner join nation as n2 on c.c_nationkey = n2.n_nationkey where n1.n_nationkey = 1 and n2.n_nationkey = 2 and o.o_orderkey < 10000"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_09'
            query = "select c_custkey, c_nationkey as nkey from customer where c_custkey < 0 and c_nationkey >=30"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_10'
            query = "select sin(c_acctbal), cos(c_acctbal), asin(c_acctbal), acos(c_acctbal), ln(c_acctbal), tan(c_acctbal), atan(c_acctbal), floor(c_acctbal), c_acctbal from customer"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              0.01, use_percentage, fileSchemaType)

            queryId = 'TEST_11'
            query = "select n1.n_nationkey as n1key, n2.n_nationkey as n2key, n1.n_nationkey + n2.n_nationkey from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6 where n1.n_nationkey < 10 and n1.n_nationkey > 5"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_12'
            query = "select count(n1.n_nationkey) as n1key, count(n2.n_nationkey) as n2key, count(*) as cstar from nation as n1 full outer join nation as n2 on n1.n_nationkey = n2.n_nationkey + 6"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_13'
            query = "select o_orderkey, o_custkey from orders where o_orderkey < 10 and o_orderkey >= 1"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)

            queryId = 'TEST_14'
            query = "select 100168549 - sum(o_orderkey)/count(o_orderkey), 56410984/sum(o_totalprice), (123 - 945/max(o_orderkey))/(sum(81619/o_orderkey)/count(81619/o_orderkey)) from orders where o_orderkey < 50"
            runTest.run_query(
                bc, drill, query, queryId, queryType, worder, '', 0.01, True,
                fileSchemaType)  #TODO: Change sum/count for avg KC

            queryId = 'TEST_15'
            query = "select EXTRACT(YEAR FROM l_receiptdate) - EXTRACT(YEAR FROM l_shipdate) as years_late, EXTRACT(MONTH FROM l_receiptdate) - EXTRACT(MONTH FROM l_shipdate) as months_late, EXTRACT(DAY FROM l_receiptdate) - EXTRACT(DAY FROM l_shipdate) as days_late from lineitem where l_shipdate < DATE '1993-01-01'"
            runTest.run_query(bc, drill, query, queryId, queryType, worder, '',
                              acceptable_difference, use_percentage,
                              fileSchemaType)
示例#29
0
    def executionTest():
        tables = ["orders", "nation", "lineitem"]
        data_types = [
            DataType.DASK_CUDF,
            DataType.CUDF,
            DataType.CSV,
            DataType.ORC,
            DataType.PARQUET,
        ]  # TODO json

        # Create Tables -----------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            # Run Query ------------------------------------------------------
            worder = 1
            use_percentage = False
            acceptable_difference = 0.01

            print("==============================")
            print(queryType)

            print("==============================")

            # ------------------ UNION ALL ---------------------

            queryId = "TEST_01"
            query = """(select o_orderkey, o_custkey from orders
                        where o_orderkey < 100
                    )
                    union all
                    (
                        select o_orderkey, o_custkey from orders
                        where o_orderkey < 300
                        and o_orderkey >= 200
                    )"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "o_orderkey",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_02"
            query = """(select o_orderkey, o_custkey from orders
                        where o_orderkey < 100
                    )
                    union all
                    (
                        select o_orderkey, o_custkey from orders
                        where o_orderkey < 300
                        and o_orderkey >= 200
                    )
                    order by 2"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "o_orderkey",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_03"
            query = """(select o_orderkey, o_totalprice as key
                        from orders where o_orderkey < 100
                    )
                    union all
                    (
                        select o_orderkey, o_custkey as key from orders
                        where o_orderkey < 300 and o_orderkey >= 200
                    )"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "o_orderkey",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_04"
            query = """(select o_orderkey, null as keyy, o_totalprice,
                        cast(null as int) as o_totalprice2, null as field5,
                        null as field6 from orders
                        where o_orderkey < 100
                    )
                    union all
                    (
                        select o_orderkey + 100.1 as o_orderkey,
                        o_custkey as keyy, null as o_totalprice,
                        o_totalprice as o_totalprice2, null as field5,
                        cast(null as double) as field6
                        from orders where o_orderkey < 300
                        and o_orderkey >= 200
                    )"""
            query_spark = """(select
                            o_orderkey,
                            cast(null as int) as keyy,
                            o_totalprice,
                            cast(null as double) as o_totalprice2,
                            cast(null as int) as field5,
                            cast(null as double) as field6
                            from orders where o_orderkey < 100
                        )
                        union all
                        (
                            select
                            o_orderkey + 100.1 as o_orderkey,
                            o_custkey as keyy,
                            cast(null as double) as o_totalprice,
                            o_totalprice as o_totalprice2,
                            cast(null as int) as field5,
                            cast(null as double) as field6
                            from orders where o_orderkey < 300
                            and o_orderkey >= 200
                        )"""
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
                query_spark=query_spark,
            )

            queryId = "TEST_05"
            query = """(select o_orderkey, 100.1, o_totalprice,
                        cast(100 as float), 100, 1.1
                        from orders where o_orderkey < 100
                    )
                    union all
                    (
                        select o_orderkey + 100.1 as o_orderkey,
                        o_custkey as keyy, 10000, o_totalprice, 101.1,100
                        from orders where o_orderkey < 300
                        and o_orderkey >= 200
                    )"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "o_orderkey",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_06"
            query = """(select o_orderkey, o_orderstatus, o_orderstatus
                        from orders where o_orderkey < 100
                    )
                    union all
                    (
                        select o_orderkey + 100.1 as o_orderkey,
                        SUBSTRING(o_orderstatus, 2, 4), 'hello work'
                        from orders where o_orderkey < 300
                        and o_orderkey >= 200
                    )"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "o_orderkey",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_07"
            query = """(select o_orderkey, o_custkey from orders
                        where o_orderkey < 100
                    )
                    union all
                    (select o_orderkey, o_custkey from orders
                        where o_orderkey < 300
                        and o_orderkey >= 200) order by 2"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "o_orderkey",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            # ------------------ UNION ---------------------

            queryId = "TEST_08"
            query = """(select o_orderkey, o_custkey from orders
                        where o_orderkey < 100
                    )
                    union
                    (
                        select o_orderkey, o_custkey from orders
                        where o_orderkey < 200 and o_orderkey >= 10
                    )"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "o_orderkey",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_09"
            query = """(select o_orderkey, o_custkey from orders
                        where o_orderkey < 60
                    )
                    union
                    (
                        select o_orderkey, o_custkey from orders
                        where o_orderkey < 200 and o_orderkey >= 10
                    )
                    order by 2"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "o_orderkey",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_10"
            query = """(select o_orderkey, o_orderstatus, o_orderstatus
                        from orders where o_orderkey < 100
                    )
                    union
                    (
                        select o_orderkey + 100.1 as o_orderkey,
                        SUBSTRING(o_orderstatus, 2, 4), 'hello work'
                        from orders where o_orderkey < 300
                        and o_orderkey >= 5
                    )"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "o_orderkey",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_11"
            query = """(select nat1.n_nationkey, nat1.n_name from nation as nat1
                        inner join lineitem on nat1.n_nationkey = mod(l_suppkey, 1010)
                        where nat1.n_name like 'INDIA'
                    ) union
			        ( select nat2.n_nationkey, nat2.n_name from nation as nat2
                        inner join orders on nat2.n_nationkey = mod(o_orderkey, 1010)
                        where nat2.n_name like 'INDIA'
                    )"""
            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_12"
            query = """select l_returnflag, l_shipdate, l_linestatus
                        from lineitem
                        where l_orderkey < 100 and l_linenumber < 2
                    union all
                        select l_returnflag, l_shipdate, l_linestatus
                        from lineitem where l_partkey < 1
                        and l_orderkey < 2 and l_linenumber < 2"""
            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            queryId = "TEST_13"
            query = """select o_orderpriority as l_returnflag,
                        o_orderdate as l_shipdate, o_orderstatus as l_linestatus
                    from orders where o_orderkey < 100
                    union all
                    select l_returnflag, l_shipdate, l_linestatus
                    from lineitem where l_orderkey = 3"""
            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            queryId = "TEST_14"
            query = """select o_orderdate as d1, o_orderpriority as s1,
                        o_orderstatus as s2, o_orderkey as l1
                    from orders where o_orderkey < 100
                    union all
                    select o_orderdate as d1, o_orderpriority as s1,
                        o_orderstatus as s2, o_orderkey as l1
                    from orders where o_custkey < 100
                    union all
                    select o_orderdate as d1, o_orderpriority as s1,
                        o_orderstatus as s2, o_orderkey as l1
                    from orders where o_orderstatus = 'O'
                    union all
                    select o_orderdate as d1, o_orderpriority as s1,
                        o_orderstatus as s2, o_orderkey as l1
                    from orders where o_totalprice < 350"""
            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break
示例#30
0
    def executionTest():
        tables = [
            "partsupp",
            "lineitem",
            "part",
            "supplier",
            "orders",
            "customer",
            "region",
            "nation",
        ]
        data_types = [
            DataType.DASK_CUDF,
            DataType.CUDF,
            DataType.CSV,
            DataType.ORC,
            DataType.PARQUET,
        ]  # TODO json
        # Create Tables -----------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            # Run Query ------------------------------------------------------

            worder = 1
            use_percentage = False
            acceptable_difference = 0.1

            print("==============================")
            print(queryType)
            print("==============================")

            queryId = "TEST_01"
            query = """select count(distinct (n_regionkey + n_nationkey)),
                    n_regionkey from nation group by n_regionkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "n_regionkey",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_02"
            query = """select count(distinct o_custkey), o_orderkey
                    from orders where o_orderkey < 100
                    group by o_orderkey, (o_orderkey + o_custkey)"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "o_orderkey",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_03"
            query = """select count(distinct(o_orderkey + o_custkey))
                    as new_col, sum(o_orderkey), o_custkey
                    from orders group by o_custkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "o_custkey",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = 'TEST_04'
            query = """select count(distinct(o_custkey)), avg(o_totalprice),
                (o_orderkey + o_custkey) as num from orders
                where o_custkey < 100 group by o_custkey, o_orderkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_05"
            query = """select count(distinct(o_custkey)), max(o_totalprice),
                    min(o_totalprice), avg(o_totalprice)
                    from orders group by o_custkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                0.01,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_06"
            query = """select n_nationkey, count(distinct(
                    n_regionkey + n_nationkey))/count(n_nationkey)
                    from nation group by n_nationkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                0.01,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_07"
            query = """select count(distinct(o_orderdate)), count(distinct(o_custkey)),
                    count(distinct(o_totalprice)), sum(o_orderkey)
                    from orders group by o_custkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = 'TEST_08'
            query = """select COUNT(DISTINCT(n.n_nationkey)),
                    AVG(r.r_regionkey) from nation as n left outer join region as r
                    on n.n_nationkey = r.r_regionkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_09"
            query = """select MIN(n.n_nationkey), MAX(r.r_regionkey),
                    COUNT(DISTINCT(n.n_nationkey + r.r_regionkey))
                    from nation as n left outer join region as r
                    on n.n_nationkey = r.r_regionkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = 'TEST_10'
            query = """select COUNT(DISTINCT(n1.n_nationkey)) as n1key,
                    COUNT(DISTINCT(n2.n_nationkey)) as n2key from nation as n1
                    full outer join nation as n2
                    on n1.n_nationkey = n2.n_regionkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = 'TEST_11'
            query = """select r.r_regionkey, n.n_nationkey,
                    COUNT(n.n_nationkey), COUNT(DISTINCT(r.r_regionkey)),
                    SUM(DISTINCT(n.n_nationkey + r.r_regionkey)) from nation as n
                    left outer join region as r on n.n_nationkey = r.r_regionkey
                    GROUP BY r.r_regionkey, n.n_nationkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_12"
            query = """select n1.n_regionkey, n2.n_nationkey,
                    MIN(n1.n_regionkey), MAX(n1.n_regionkey),
                    AVG(n2.n_nationkey)
                    from nation as n1 full outer join nation as n2
                    on n1.n_nationkey = n2.n_nationkey + 6
                    GROUP BY n1.n_regionkey, n2.n_nationkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = 'TEST_13'
            query = """select COUNT(DISTINCT(n.n_nationkey)),
                    AVG(r.r_regionkey) from nation as n right outer join region as r
                    on n.n_nationkey = r.r_regionkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = 'TEST_14'
            query = """select r.r_regionkey, n.n_nationkey,
                    COUNT(n.n_nationkey), COUNT(DISTINCT(r.r_regionkey)),
                    SUM(DISTINCT(n.n_nationkey + r.r_regionkey)) from nation as n
                    right outer join region as r on n.n_nationkey = r.r_regionkey
                    GROUP BY r.r_regionkey, n.n_nationkey"""
            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            if Settings.execution_mode == ExecutionMode.GENERATOR:
                print("==============================")
                break