Exemplo n.º 1
0
def samples(bc, dask_client, nRals, **kwargs):
    init_tables = kwargs.get("init_tables", False)
    sql_table_filter_map = kwargs.get("sql_table_filter_map", {})
    sql_table_batch_size_map = kwargs.get("sql_table_batch_size_map", {})
    sql = kwargs.get("sql_connection", None)
    for fileSchemaType in datasources(dask_client, nRals):
        dstables = datasource_tables[fileSchemaType]

        if init_tables:
            print("Creating tables for", str(fileSchemaType))
            table_names = list(dstables.values())
            createSchema.create_tables(
                bc,
                "",
                fileSchemaType,
                tables=tables,
                table_names=table_names,
                sql_table_filter_map=sql_table_filter_map,
                sql_table_batch_size_map=sql_table_batch_size_map,
                sql_connection=sql,
            )
            print("All tables were created for", str(fileSchemaType))
        i = 0

        queries = [get_tpch_query(q, dstables) for q in tpch_queries]
        for query in queries:
            i = i + 1
            istr = str(i) if i > 10 else "0" + str(i)
            queryId = "TEST_" + istr
            sampleId = str(fileSchemaType) + "." + queryId
            yield sampleId, query, queryId, fileSchemaType
Exemplo n.º 2
0
def samples(bc, dask_client, nRals, **kwargs):
    init_tables = kwargs.get("init_tables", False)
    dir_data_lc = kwargs.get("dir_data_lc", "")

    for fileSchemaType in datasources(dask_client, nRals):
        dstables = datasource_tables[fileSchemaType]

        if init_tables:
            print("Creating tables for", str(fileSchemaType))
            cs.create_tables(bc, dir_data_lc, fileSchemaType, tables = tables, table_names=list(dstables.values()))
            print("All tables were created for", str(fileSchemaType))
        i = 0

        queries = [get_tpch_query(q, dstables) for q in tpch_queries]
        for query in queries:
            i = i + 1
            istr = str(i) if i > 10 else "0"+str(i)
            queryId = "TEST_" + istr
            sampleId = str(fileSchemaType) + "." + queryId
            yield sampleId, query, queryId, fileSchemaType
Exemplo n.º 3
0
    def executionTest(queryType, setInd, config_options):

        bc, dask_client = init_context(config_options)

        tables = [
            "nation",
            "region",
            "customer",
            "lineitem",
            "orders",
            "supplier",
            "part",
            "partsupp",
        ]

        data_types = [
            DataType.DASK_CUDF,
            DataType.CUDF,
            DataType.CSV,
            DataType.PARQUET,
        ]  # TODO orc, json

        # Create Tables ------------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            # Run Query ------------------------------------------------------
            worder = 1  # Parameter to indicate if its necessary to order
            # the resulsets before compare them
            use_percentage = False
            acceptable_difference = 0.001

            print("==============================")
            print(queryType)
            print("Test set: " + str(setInd + 1) + " Options: " +
                  str(config_options))
            print("==============================")

            queryId = "TEST_01"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_02"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_03"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                0.1,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_04"

            query = tpch.get_tpch_query(queryId)

            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    True,
                    fileSchemaType,
                )

            queryId = "TEST_05"

            query = tpch.get_tpch_query(queryId)

            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            queryId = "TEST_06"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                True,
                fileSchemaType,
            )

            queryId = "TEST_07"

            query = tpch.get_tpch_query(queryId)

            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            queryId = "TEST_08"

            query = tpch.get_tpch_query(queryId)

            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            queryId = "TEST_09"

            query = tpch.get_tpch_query(queryId)

            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    True,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    True,
                    fileSchemaType,
                )

            queryId = "TEST_10"

            query = tpch.get_tpch_query(queryId)

            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            queryId = "TEST_11"

            query = tpch.get_tpch_query(queryId)

            # runTest.run_query(
            #     bc,
            #     drill,
            #     query,
            #     queryId,
            #     queryType,
            #     worder,
            #     "",
            #     acceptable_difference,
            #     use_percentage,
            #     fileSchemaType,
            # )

            queryId = "TEST_12"

            query = tpch.get_tpch_query(queryId)

            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            queryId = "TEST_13"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_14"

            query = tpch.get_tpch_query(queryId)

            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            queryId = "TEST_15"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_16"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_17"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_18"

            query = tpch.get_tpch_query(queryId)

            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            queryId = "TEST_19"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                True,
                fileSchemaType,
            )

            queryId = "TEST_20"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                True,
                fileSchemaType,
            )

            queryId = "TEST_21"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_22"

            query = tpch.get_tpch_query(queryId)

            # runTest.run_query(
            #     bc,
            #     drill,
            #     query,
            #     queryId,
            #     queryType,
            #     worder,
            #     "",
            #     acceptable_difference,
            #     use_percentage,
            #     fileSchemaType,
            # )

        if dask_client is not None:
            dask_client.run(gc.collect)
            dask_client.run_on_scheduler(gc.collect)

            dask_client.close()
            dask_client.shutdown()
            del dask_client
        del bc
Exemplo n.º 4
0
    def executionTest(queryType):

        tables = [
            "nation",
            "region",
            "customer",
            "lineitem",
            "orders",
            "supplier",
            "part",
            "partsupp",
        ]
        data_types = [           
            DataType.DASK_CUDF,
            DataType.CUDF,
            DataType.CSV,
            DataType.PARQUET
        ]  # TODO orc, json

        # Create Tables ------------------------------------------------------
        for fileSchemaType in data_types:
            if skip_test(dask_client, nRals, fileSchemaType, queryType):
                continue
            cs.create_tables(bc, dir_data_file, fileSchemaType, tables=tables)

            # Run Query ------------------------------------------------------
            worder = 1  # Parameter to indicate if its necessary to order
            # the resulsets before compare them
            use_percentage = False
            acceptable_difference = 0.001

            print("==============================")
            print(queryType)
            print("==============================")

            queryId = "TEST_01"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_02"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_03"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                0.1,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_04"

            query = tpch.get_tpch_query(queryId)

            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    True,
                    fileSchemaType,
                )

            queryId = "TEST_05"

            query = tpch.get_tpch_query(queryId)

            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            queryId = "TEST_06"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    True,
                    fileSchemaType,
                )

            queryId = "TEST_07"

            query = tpch.get_tpch_query(queryId)

            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            queryId = "TEST_08"

            query = tpch.get_tpch_query(queryId)

            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            queryId = "TEST_09"

            query = tpch.get_tpch_query(queryId)

            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    True,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    True,
                    fileSchemaType,
                )

            queryId = "TEST_10"

            query = tpch.get_tpch_query(queryId)

            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            queryId = "TEST_11"

            query = tpch.get_tpch_query(queryId)

            # runTest.run_query(
            #     bc,
            #     drill,
            #     query,
            #     queryId,
            #     queryType,
            #     worder,
            #     "",
            #     acceptable_difference,
            #     use_percentage,
            #     fileSchemaType,
            # )

            queryId = "TEST_12"

            query = tpch.get_tpch_query(queryId)

            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            queryId = "TEST_13"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_14"

            query = tpch.get_tpch_query(queryId)

            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            queryId = "TEST_15"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType
            )

            queryId = "TEST_16"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                '',
                acceptable_difference,
                use_percentage,
                fileSchemaType
            )

            queryId = "TEST_17"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                bc,
                spark,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                use_percentage,
                fileSchemaType,
            )

            queryId = "TEST_18"

            query = tpch.get_tpch_query(queryId)

            if fileSchemaType == DataType.ORC:
                runTest.run_query(
                    bc,
                    spark,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )
            else:
                runTest.run_query(
                    bc,
                    drill,
                    query,
                    queryId,
                    queryType,
                    worder,
                    "",
                    acceptable_difference,
                    use_percentage,
                    fileSchemaType,
                )

            queryId = "TEST_19"

            query = tpch.get_tpch_query(queryId)

            runTest.run_query(
                bc,
                drill,
                query,
                queryId,
                queryType,
                worder,
                "",
                acceptable_difference,
                True,
                fileSchemaType,
            )

            # queryId = "TEST_20"

            # query = tpch.get_tpch_query(queryId)

            # runTest.run_query(
            #     bc,
            #     spark,
            #     query,
            #     queryId,
            #     queryType,
            #     worder,
            #     "",
            #     acceptable_difference,
            #     True,
            #     fileSchemaType,
            # )

            # queryId = "TEST_21"

            # query = tpch.get_tpch_query(queryId)

            # runTest.run_query(
            #     bc,
            #     spark,
            #     query,
            #     queryId,
            #     queryType,
            #     worder,
            #     "",
            #     acceptable_difference,
            #     use_percentage,
            #     fileSchemaType,
            # )

            queryId = "TEST_22"

            query = tpch.get_tpch_query(queryId)
Exemplo n.º 5
0
    def executionGSTest(queryType):

        authority = "tpch_gs"

        bc.gs(authority,
              project_id=gs_project_id,
              bucket_name=gs_bucket_name,
              use_default_adc_json_file=True,
              adc_json_file='')

        dir_data_lc = 'gcs://' + authority + '/100MB2Part/'

        tables = [
            'nation', 'region', 'supplier', 'customer', 'lineitem', 'orders',
            'part', 'partsupp'
        ]
        data_types = [DataType.PARQUET]

        for fileSchemaType in data_types:
            create_tables(bc, data_dir, fileSchemaType, tables=tables)

            #   Run Query -----------------------------------------------------------------------------
            worder = 1  # Parameter to indicate if its necessary to order the resulsets before compare them
            use_percentage = False
            acceptable_difference = 0.01

            print('==============================')
            print(queryType)
            print('==============================')

            queryId = 'TEST_15'
            print("Executing " + queryId + " ... ")
            query = tpch.get_tpch_query(queryId)
            result = run_query(bc, spark, query, queryId, queryType, worder,
                               '', acceptable_difference, use_percentage,
                               fileSchemaType)

            log_dict[queryId] = result

            queryId = 'TEST_16'
            print("Executing " + queryId + " ... ")
            query = tpch.get_tpch_query(queryId)
            result = run_query(bc, drill, query, queryId, queryType, worder,
                               '', acceptable_difference, use_percentage,
                               fileSchemaType)

            queryId = 'TEST_17'
            print("Executing " + queryId + " ... ")
            query = tpch.get_tpch_query(queryId)
            result = run_query(bc, spark, query, queryId, queryType, worder,
                               '', acceptable_difference, use_percentage,
                               fileSchemaType)

            log_dict[queryId] = result

            queryId = 'TEST_18'
            print("Executing " + queryId + " ... ")
            query = tpch.get_tpch_query(queryId)
            result = run_query(bc, drill, query, queryId, queryType, worder,
                               '', acceptable_difference, use_percentage,
                               fileSchemaType)

            log_dict[queryId] = result

            queryId = 'TEST_19'
            print("Executing " + queryId + " ... ")
            query = tpch.get_tpch_query(queryId)
            result = run_query(bc, drill, query, queryId, queryType, worder,
                               '', acceptable_difference, use_percentage,
                               fileSchemaType)

            log_dict[queryId] = result

            queryId = 'TEST_20'
            print("Executing " + queryId + " ... ")
            query = tpch.get_tpch_query(queryId)
            result = run_query(bc, spark, query, queryId, queryType, worder,
                               '', acceptable_difference, use_percentage,
                               fileSchemaType)

            log_dict[queryId] = result

            queryId = 'TEST_21'
            print("Executing " + queryId + " ... ")
            query = tpch.get_tpch_query(queryId)
            result = run_query(bc, spark, query, queryId, queryType, worder,
                               '', acceptable_difference, use_percentage,
                               fileSchemaType)

            log_dict[queryId] = result

            queryId = 'TEST_22'
            #print("Executing " + queryId + " ... ")
            query = tpch.get_tpch_query(queryId)
Exemplo n.º 6
0
    def executionS3Test(queryType):

        #Read Data TPCH------------------------------------------------------------------------------------------------------------

        authority = "tpch_s3"

        print(authority)
        print(bucket_name)
        hola = S3EncryptionType.NONE
        print(hola)
        print(access_key_id)
        print(secret_key)

        bc.s3(authority,
              bucket_name=bucket_name,
              encryption_type=S3EncryptionType.NONE,
              access_key_id=access_key_id,
              secret_key=secret_key)

        dir_data_lc = "s3://" + authority + "/" + "DataSet100Mb2part/"

        tables = [
            'nation', 'region', 'supplier', 'customer', 'lineitem', 'orders',
            'part', 'partsupp'
        ]
        data_types = [DataType.PARQUET]  # TODO json

        for fileSchemaType in data_types:
            create_tables(bc, data_dir, fileSchemaType, tables=tables)

            #   Run Query -----------------------------------------------------------------------------
            worder = 1  # Parameter to indicate if its necessary to order the resulsets before compare them
            use_percentage = False
            acceptable_difference = 0.01

            print('==============================')
            print(queryType)
            print('==============================')

            queryId = 'TEST_08'
            print("Executing " + queryId + " ... ")
            query = tpch.get_tpch_query(queryId)
            result = run_query(bc, drill, query, queryId, queryType, worder,
                               '', acceptable_difference, use_percentage,
                               fileSchemaType)

            log_dict[queryId] = result

            queryId = 'TEST_09'
            print("Executing " + queryId + " ... ")
            query = tpch.get_tpch_query(queryId)
            result = run_query(bc, drill, query, queryId, queryType, worder,
                               '', acceptable_difference, use_percentage,
                               fileSchemaType)

            queryId = 'TEST_10'
            print("Executing " + queryId + " ... ")
            query = tpch.get_tpch_query(queryId)
            result = run_query(bc, drill, query, queryId, queryType, worder,
                               '', acceptable_difference, use_percentage,
                               fileSchemaType)

            log_dict[queryId] = result

            queryId = 'TEST_11'
            #print("Executing " + queryId + " ... ")
            query = tpch.get_tpch_query(queryId)
            #result = run_query(bc, drill, query, queryId, queryType, worder, '', acceptable_difference, use_percentage, fileSchemaType)

            queryId = 'TEST_12'
            print("Executing " + queryId + " ... ")
            query = tpch.get_tpch_query(queryId)
            result = run_query(bc, drill, query, queryId, queryType, worder,
                               '', acceptable_difference, use_percentage,
                               fileSchemaType)

            log_dict[queryId] = result

            queryId = 'TEST_13'
            print("Executing " + queryId + " ... ")
            query = tpch.get_tpch_query(queryId)
            result = run_query(bc, drill, query, queryId, queryType, worder,
                               '', acceptable_difference, use_percentage,
                               fileSchemaType)

            log_dict[queryId] = result

            queryId = 'TEST_14'
            print("Executing " + queryId + " ... ")
            query = tpch.get_tpch_query(queryId)
            result = run_query(bc, drill, query, queryId, queryType, worder,
                               '', acceptable_difference, use_percentage,
                               fileSchemaType)

            log_dict[queryId] = result
Exemplo n.º 7
0
    def executionLocalTest(queryType):

        #Read Data TPCH------------------------------------------------------------------------------------------------------------

        tables = [
            'nation', 'region', 'supplier', 'customer', 'lineitem', 'orders',
            'part', 'partsupp'
        ]

        data_types = [DataType.PARQUET]  # TODO json

        for fileSchemaType in data_types:
            create_tables(bc, data_dir, fileSchemaType, tables=tables)

            #   Run Query -----------------------------------------------------------------------------
            worder = 1  # Parameter to indicate if its necessary to order the resulsets before compare them
            use_percentage = False
            acceptable_difference = 0.01

            print('==============================')
            print(queryType)
            print('==============================')

            queryId = 'TEST_01'
            print("Executing " + queryId + " ... ")
            query = tpch.get_tpch_query(queryId)
            result = run_query(bc, drill, query, queryId, queryType, worder,
                               '', acceptable_difference, use_percentage,
                               fileSchemaType)

            log_dict[queryId] = result

            queryId = 'TEST_02'
            print("Executing " + queryId + " ... ")
            query = tpch.get_tpch_query(queryId)
            result = run_query(bc, drill, query, queryId, queryType, worder,
                               '', acceptable_difference, use_percentage,
                               fileSchemaType)

            log_dict[queryId] = result

            queryId = 'TEST_03'
            print("Executing " + queryId + " ... ")
            query = tpch.get_tpch_query(queryId)
            result = run_query(bc, spark, query, queryId, queryType, worder,
                               '', acceptable_difference, use_percentage,
                               fileSchemaType)

            log_dict[queryId] = result

            queryId = 'TEST_04'
            print("Executing " + queryId + " ... ")
            query = tpch.get_tpch_query(queryId)
            result = run_query(bc, drill, query, queryId, queryType, worder,
                               '', acceptable_difference, use_percentage,
                               fileSchemaType)

            queryId = 'TEST_05'
            print("Executing " + queryId + " ... ")
            query = tpch.get_tpch_query(queryId)
            result = run_query(bc, drill, query, queryId, queryType, worder,
                               '', acceptable_difference, use_percentage,
                               fileSchemaType)

            log_dict[queryId] = result

            queryId = 'TEST_06'
            print("Executing " + queryId + " ... ")
            query = tpch.get_tpch_query(queryId)
            result = run_query(bc, spark, query, queryId, queryType, worder,
                               '', acceptable_difference, use_percentage,
                               fileSchemaType)

            log_dict[queryId] = result

            queryId = 'TEST_07'
            print("Executing " + queryId + " ... ")
            query = tpch.get_tpch_query(queryId)
            result = run_query(bc, drill, query, queryId, queryType, worder,
                               '', acceptable_difference, use_percentage,
                               fileSchemaType)

            log_dict[queryId] = result