Exemplo n.º 1
0
 def do_test_query(self, query):
   spark = get_spark_i_know_what_i_am_doing()
   jvm_session = _get_jvm_session(spark)
   jvm = _get_jvm(spark)
   tests = {
     "q1": jvm.com.nvidia.spark.rapids.tests.tpch.Q1Like,
     "q2": jvm.com.nvidia.spark.rapids.tests.tpch.Q2Like,
     "q3": jvm.com.nvidia.spark.rapids.tests.tpch.Q3Like,
     "q4": jvm.com.nvidia.spark.rapids.tests.tpch.Q4Like,
     "q5": jvm.com.nvidia.spark.rapids.tests.tpch.Q5Like,
     "q6": jvm.com.nvidia.spark.rapids.tests.tpch.Q6Like,
     "q7": jvm.com.nvidia.spark.rapids.tests.tpch.Q7Like,
     "q8": jvm.com.nvidia.spark.rapids.tests.tpch.Q8Like,
     "q9": jvm.com.nvidia.spark.rapids.tests.tpch.Q9Like,
     "q10": jvm.com.nvidia.spark.rapids.tests.tpch.Q10Like,
     "q11": jvm.com.nvidia.spark.rapids.tests.tpch.Q11Like,
     "q12": jvm.com.nvidia.spark.rapids.tests.tpch.Q12Like,
     "q13": jvm.com.nvidia.spark.rapids.tests.tpch.Q13Like,
     "q14": jvm.com.nvidia.spark.rapids.tests.tpch.Q14Like,
     "q15": jvm.com.nvidia.spark.rapids.tests.tpch.Q15Like,
     "q16": jvm.com.nvidia.spark.rapids.tests.tpch.Q16Like,
     "q17": jvm.com.nvidia.spark.rapids.tests.tpch.Q17Like,
     "q18": jvm.com.nvidia.spark.rapids.tests.tpch.Q18Like,
     "q19": jvm.com.nvidia.spark.rapids.tests.tpch.Q19Like,
     "q20": jvm.com.nvidia.spark.rapids.tests.tpch.Q20Like,
     "q21": jvm.com.nvidia.spark.rapids.tests.tpch.Q21Like,
     "q22": jvm.com.nvidia.spark.rapids.tests.tpch.Q22Like
   }
   df = tests.get(query).apply(jvm_session)
   return DataFrame(df, spark.getActiveSession())
Exemplo n.º 2
0
 def do_test_query(self, query):
     spark = get_spark_i_know_what_i_am_doing()
     jvm_session = _get_jvm_session(spark)
     jvm = _get_jvm(spark)
     df = jvm.com.nvidia.spark.rapids.tests.tpcds.TpcdsLikeSpark.run(
         jvm_session, query)
     return DataFrame(df, spark.getActiveSession())
Exemplo n.º 3
0
def is_databricks_version_or_later(major, minor):
    spark = get_spark_i_know_what_i_am_doing()
    version = spark.conf.get("spark.databricks.clusterUsageTags.sparkVersion", "0.0")
    parts = version.split(".")
    if (len(parts) < 2):
        raise RuntimeError("Unable to determine Databricks version from version string: " + version)
    return int(parts[0]) >= major and int(parts[1]) >= minor
Exemplo n.º 4
0
def spark_tmp_table_factory(request):
    base_id = 'tmp_table_{}'.format(random.randint(0, 1000000))
    yield TmpTableFactory(base_id)
    sp = get_spark_i_know_what_i_am_doing()
    tables = sp.sql("SHOW TABLES".format(base_id)).collect()
    for row in tables:
        t_name = row['tableName']
        if (t_name.startswith(base_id)):
            sp.sql("DROP TABLE IF EXISTS {}".format(t_name))
Exemplo n.º 5
0
def spark_tmp_table_factory(request):
    worker_id = get_worker_id(request)
    table_id = random.getrandbits(31)
    base_id = f'tmp_table_{worker_id}_{table_id}'
    yield TmpTableFactory(base_id)
    sp = get_spark_i_know_what_i_am_doing()
    tables = sp.sql("SHOW TABLES".format(base_id)).collect()
    for row in tables:
        t_name = row['tableName']
        if (t_name.startswith(base_id)):
            sp.sql("DROP TABLE IF EXISTS {}".format(t_name))
Exemplo n.º 6
0
 def do_test_query(self, query):
   spark = get_spark_i_know_what_i_am_doing()
   jvm_session = _get_jvm_session(spark)
   jvm = _get_jvm(spark)
   tests = {
     "q5": jvm.com.nvidia.spark.rapids.tests.tpcxbb.Q5Like,
     "q16": jvm.com.nvidia.spark.rapids.tests.tpcxbb.Q16Like,
     "q21": jvm.com.nvidia.spark.rapids.tests.tpcxbb.Q21Like,
     "q22": jvm.com.nvidia.spark.rapids.tests.tpcxbb.Q22Like
   }
   df = tests.get(query).apply(jvm_session)
   return DataFrame(df, spark.getActiveSession())
Exemplo n.º 7
0
def spark_tmp_path(request):
    debug = request.config.getoption('debug_tmp_path')
    ret = request.config.getoption('tmp_path')
    if ret is None:
        ret = '/tmp/pyspark_tests/'
    ret = ret + '/' + str(random.randint(0, 1000000)) + '/'
    # Make sure it is there and accessible
    sc = get_spark_i_know_what_i_am_doing().sparkContext
    config = sc._jsc.hadoopConfiguration()
    path = sc._jvm.org.apache.hadoop.fs.Path(ret)
    fs = sc._jvm.org.apache.hadoop.fs.FileSystem.get(config)
    fs.mkdirs(path)
    yield ret
    if not debug:
        fs.delete(path)
Exemplo n.º 8
0
def spark_tmp_path(request):
    debug = request.config.getoption('debug_tmp_path')
    ret = request.config.getoption('tmp_path')
    if ret is None:
        ret = '/tmp/pyspark_tests/'
    worker_id = get_worker_id(request)
    pid = os.getpid()
    hostname = os.uname()[1]
    ret = f'{ret}/{hostname}-{worker_id}-{pid}-{random.randrange(0, 1<<31)}/'
    # Make sure it is there and accessible
    sc = get_spark_i_know_what_i_am_doing().sparkContext
    config = sc._jsc.hadoopConfiguration()
    path = sc._jvm.org.apache.hadoop.fs.Path(ret)
    fs = sc._jvm.org.apache.hadoop.fs.FileSystem.get(config)
    fs.mkdirs(path)
    yield ret
    if not debug:
        fs.delete(path)
Exemplo n.º 9
0
from conftest import is_allowing_any_non_gpu, get_non_gpu_allowed, get_validate_execs_in_gpu_plan
from pyspark.sql import SparkSession, DataFrame
from spark_init_internal import get_spark_i_know_what_i_am_doing, spark_version


def _from_scala_map(scala_map):
    ret = {}
    # The value we get is a scala map, not a java map, so we need to jump through some hoops
    keys = scala_map.keys().iterator()
    while keys.hasNext():
        key = keys.next()
        ret[key] = scala_map.get(key).get()
    return ret


_spark = get_spark_i_know_what_i_am_doing()
# Have to reach into a private member to get access to the API we need
_orig_conf = _from_scala_map(_spark.conf._jconf.getAll())
_orig_conf_keys = _orig_conf.keys()


def is_tz_utc(spark=_spark):
    """
    true if the tz is UTC else false
    """
    # Now we have to do some kind of ugly internal java stuff
    jvm = spark.sparkContext._jvm
    utc = jvm.java.time.ZoneId.of('UTC').normalized()
    sys_tz = jvm.java.time.ZoneId.systemDefault().normalized()
    return utc == sys_tz
Exemplo n.º 10
0
 def __init__(self, tpcds_format, tpcds_path):
   self.tpcds_format = tpcds_format
   self.tpcds_path = tpcds_path
   self.setup(get_spark_i_know_what_i_am_doing())
Exemplo n.º 11
0
def spark_jvm():
    return _get_jvm(get_spark_i_know_what_i_am_doing())