示例#1
0
def _training_data_to_numpy(X, y):
    """Convert input training data into numpy format"""
    if isinstance(X, np.ndarray):
        X_np = X
        y_np = y
    elif isinstance(X, cudf.DataFrame):
        X_np = X.as_gpu_matrix().copy_to_host()
        y_np = y.to_gpu_array().copy_to_host()
    elif cuda.devicearray.is_cuda_ndarray(X):
        X_np = X.copy_to_host()
        y_np = y.copy_to_host()
    elif isinstance(X, (pd.DataFrame, pd.Series)):
        X_np = datagen._convert_to_numpy(X)
        y_np = datagen._convert_to_numpy(y)
    else:
        raise TypeError("Received unsupported input type")
    return X_np, y_np
示例#2
0
def _benchmark_algo(
    benchmarker,
    algo_name,
    bench_step,
    dataset,
    setup_kwargs={},
    training_kwargs={},
    inference_kwargs={},
    client=None
):
    """
    Benchmark utility

    Parameters
    ----------
    benchmarker :
       Pytest benchmark function, allows to enclose the code
       that should be benchmarked
    algo_name :
       Algorithm/model name, can be found in the algorithms.py file
    bench_step :
        Either 'training' or 'inference', describe the algorithm/model
        step to be benchmarked
    dataset :
        Tuple with the data and a dictionnary that describes how it was built.
        The dictionnary can be later used during the NVTX benchmark.
    setup_kwargs :
        Algorithm/model setup kwargs
    training_kwargs :
        Algorithm/model training kwargs
    inference_kwargs :
        Algorithm/model inference kwargs
    client :
        Dask client used in MNMG settings
    """

    # Get data and dict describing how it was built
    dataset, data_kwargs = dataset

    # The presence of a Dask client signifies MNMG mode
    MNMG_mode = client is not None

    # Distribute data in MNMG settings
    if MNMG_mode:
        # Add the client to the setup kwargs used by model instantiation
        setup_kwargs['client'] = client
        # Exception : data is scattered by the MNMG DBSCAN model itself
        if algo_name != 'MNMG.DBSCAN':
            # Distribute data
            dataset = [distribute(client, d) for d in dataset]

    # Search AlgorithmPair instance by name
    algo = algorithms.algorithm_by_name(algo_name)
    # Setup the AlgorithmPair and the model to be ready for benchmark on GPU
    cuml_setup = setup_bench('cuml', algo, bench_step, dataset,
                             setup_kwargs, training_kwargs)

    # Pytest benchmark
    if bench_step == 'training':
        benchmarker(algo.run_cuml, dataset, bench_args=training_kwargs,
                    **cuml_setup)
    elif bench_step == 'inference':
        benchmarker(algo.run_cuml, dataset, bench_args=inference_kwargs,
                    **cuml_setup)

    # CPU benchmark and NVTX benchmark (only in SG mode)
    if not MNMG_mode:
        # Check that the cuML model has a CPU equivalency
        if algo.cpu_class:
            # Convert sataset to a Numpy array
            cpu_dataset = datagen._convert_to_numpy(dataset)
            # Setup the AlgorithmPair and the model
            # to be ready for benchmark on CPU
            cpu_setup = setup_bench('cpu', algo, bench_step, cpu_dataset,
                                    setup_kwargs, training_kwargs)
            # CPU benchmark
            cpu_bench(algo, bench_step, cpu_dataset, inference_kwargs,
                      cpu_setup)

        # NVTX benchmark performs both the training and inference at once
        # but only when bench_step == 'inference'
        if bench_step == 'inference':
            # NVTX benchmark
            nvtx_profiling(algo_name, data_kwargs, setup_kwargs,
                           training_kwargs, inference_kwargs)