Exemplo n.º 1
0
def with_gpu_session(func, conf={}):
    """
    Run func that takes a spark session as input with the given configs set on the GPU.
    Note that this forces you into test mode unless.  It is not a requirement, but is
    simplest for right now.
    """
    copy = dict(conf)
    copy['spark.rapids.sql.enabled'] = 'true'
    if is_allowing_any_non_gpu():
        copy['spark.rapids.sql.test.enabled'] = 'false'
    else:
        copy['spark.rapids.sql.test.enabled'] = 'true'
        copy['spark.rapids.sql.test.allowedNonGpu'] = ','.join(get_non_gpu_allowed())
    return with_spark_session(func, conf=copy)
Exemplo n.º 2
0
def with_gpu_session(func, conf={}):
    """
    Run func that takes a spark session as input with the given configs set on the GPU.
    Note that this forces you into test mode unless.  It is not a requirement, but is
    simplest for right now.
    """
    copy = dict(conf)
    copy['spark.rapids.sql.enabled'] = 'true'
    if is_allowing_any_non_gpu():
        copy['spark.rapids.sql.test.enabled'] = 'false'
    else:
        copy['spark.rapids.sql.test.enabled'] = 'true'
        copy['spark.rapids.sql.test.allowedNonGpu'] = ','.join(get_non_gpu_allowed())

    copy['spark.rapids.sql.test.validateExecsInGpuPlan'] = ','.join(get_validate_execs_in_gpu_plan())
    # TODO: remove when decimal types can be enabled by default
    copy['spark.rapids.sql.decimalType.enabled'] = 'true'
    return with_spark_session(func, conf=copy)
Exemplo n.º 3
0
def test_csv_fallback(spark_tmp_path, read_func, disable_conf):
    data_gens = [
        StringGen('(\\w| |\t|\ud720){0,10}', nullable=False), byte_gen,
        short_gen, int_gen, long_gen, boolean_gen, date_gen
    ]

    gen_list = [('_c' + str(i), gen) for i, gen in enumerate(data_gens)]
    gen = StructGen(gen_list, nullable=False)
    data_path = spark_tmp_path + '/CSV_DATA'
    schema = gen.data_type
    updated_conf = _enable_all_types_conf.copy()
    updated_conf[disable_conf] = 'false'

    reader = read_func(data_path, schema)
    with_cpu_session(lambda spark: gen_df(spark, gen).write.csv(data_path))
    assert_gpu_fallback_collect(
        lambda spark: reader(spark).select(f.col('*'),
                                           f.col('_c2') + f.col('_c3')),
        # TODO add support for lists
        cpu_fallback_class_name=get_non_gpu_allowed()[0],
        conf=updated_conf)