def _training_data_to_numpy(X, y): """Convert input training data into numpy format""" if isinstance(X, np.ndarray): X_np = X y_np = y elif isinstance(X, cudf.DataFrame): X_np = X.as_gpu_matrix().copy_to_host() y_np = y.to_gpu_array().copy_to_host() elif cuda.devicearray.is_cuda_ndarray(X): X_np = X.copy_to_host() y_np = y.copy_to_host() elif isinstance(X, (pd.DataFrame, pd.Series)): X_np = datagen._convert_to_numpy(X) y_np = datagen._convert_to_numpy(y) else: raise TypeError("Received unsupported input type") return X_np, y_np
def _benchmark_algo( benchmarker, algo_name, bench_step, dataset, setup_kwargs={}, training_kwargs={}, inference_kwargs={}, client=None ): """ Benchmark utility Parameters ---------- benchmarker : Pytest benchmark function, allows to enclose the code that should be benchmarked algo_name : Algorithm/model name, can be found in the algorithms.py file bench_step : Either 'training' or 'inference', describe the algorithm/model step to be benchmarked dataset : Tuple with the data and a dictionnary that describes how it was built. The dictionnary can be later used during the NVTX benchmark. setup_kwargs : Algorithm/model setup kwargs training_kwargs : Algorithm/model training kwargs inference_kwargs : Algorithm/model inference kwargs client : Dask client used in MNMG settings """ # Get data and dict describing how it was built dataset, data_kwargs = dataset # The presence of a Dask client signifies MNMG mode MNMG_mode = client is not None # Distribute data in MNMG settings if MNMG_mode: # Add the client to the setup kwargs used by model instantiation setup_kwargs['client'] = client # Exception : data is scattered by the MNMG DBSCAN model itself if algo_name != 'MNMG.DBSCAN': # Distribute data dataset = [distribute(client, d) for d in dataset] # Search AlgorithmPair instance by name algo = algorithms.algorithm_by_name(algo_name) # Setup the AlgorithmPair and the model to be ready for benchmark on GPU cuml_setup = setup_bench('cuml', algo, bench_step, dataset, setup_kwargs, training_kwargs) # Pytest benchmark if bench_step == 'training': benchmarker(algo.run_cuml, dataset, bench_args=training_kwargs, **cuml_setup) elif bench_step == 'inference': benchmarker(algo.run_cuml, dataset, bench_args=inference_kwargs, **cuml_setup) # CPU benchmark and NVTX benchmark (only in SG mode) if not MNMG_mode: # Check that the cuML model has a CPU equivalency if algo.cpu_class: # Convert sataset to a Numpy array cpu_dataset = datagen._convert_to_numpy(dataset) # Setup the AlgorithmPair and the model # to be ready for benchmark on CPU cpu_setup = setup_bench('cpu', algo, bench_step, cpu_dataset, setup_kwargs, training_kwargs) # CPU benchmark cpu_bench(algo, bench_step, cpu_dataset, inference_kwargs, cpu_setup) # NVTX benchmark performs both the training and inference at once # but only when bench_step == 'inference' if bench_step == 'inference': # NVTX benchmark nvtx_profiling(algo_name, data_kwargs, setup_kwargs, training_kwargs, inference_kwargs)