def _benchmark_algo( benchmark, name, dataset_name, n_samples=10000, n_features=100, input_type='numpy', data_kwargs={}, algo_args={}, ): """Simplest benchmark wrapper to time algorithm 'name' on dataset 'dataset_name'""" algo = algorithms.algorithm_by_name(name) data = datagen.gen_data( dataset_name, input_type, n_samples=n_samples, n_features=n_features, **data_kwargs ) def _benchmark_inner(): algo.run_cuml(data, **algo_args) benchmark(_benchmark_inner)
def test_fil_input_types(input_type): pair = algorithms.algorithm_by_name('FIL') if not has_xgboost(): pytest.xfail() runner = AccuracyComparisonRunner( [20], [5], dataset_name='classification', test_fraction=0.5, input_type=input_type) results = runner.run(pair, run_cpu=False)[0] assert results["cuml_acc"] is not None
def test_run_variations(): algo = algorithms.algorithm_by_name("LogisticRegression") res = run_variations( [algo], dataset_name="classification", bench_rows=[100, 200], bench_dims=[10, 20], ) assert res.shape[0] == 4 assert (res.n_samples == 100).sum() == 2 assert (res.n_features == 20).sum() == 2
def test_real_algos_runner(algo_name): pair = algorithms.algorithm_by_name(algo_name) if (algo_name == 'UMAP' and not has_umap()) or \ (algo_name == 'FIL' and not has_xgboost()): pytest.xfail() runner = AccuracyComparisonRunner([20], [5], dataset_name='classification', test_fraction=0.20) results = runner.run(pair)[0] print(results) assert results["cuml_acc"] is not None
algos = algos.union(preprocessing_algo_names) algos.remove('preprocessing') invalidAlgoNames = (algos - allAlgoNames) if invalidAlgoNames: raise ValueError("Invalid algo name(s): %s" % invalidAlgoNames) bench_to_run = bench_config[args.benchmark] default_args = dict(run_cpu=True, n_reps=args.n_reps) all_results = [] for cfg_in in bench_to_run: if (algos is None) or ("ALL" in algos) or \ (cfg_in["algo_name"] in algos): # Pass an actual algo object instead of an algo_name string cfg = cfg_in.copy() algo = algorithms.algorithm_by_name(cfg_in["algo_name"]) cfg["algos"] = [algo] alg_name = cfg["algo_name"] if alg_name.startswith('Sparse'): if alg_name.startswith('SparseCSR'): input_type = 'scipy-sparse-csr' elif alg_name.startswith('SparseCSC'): input_type = 'scipy-sparse-csc' else: input_type = 'numpy' del cfg["algo_name"] res = run_variations(**{ **default_args, **cfg }, input_type=input_type)
if args.num_features > 0: bench_dims = [args.num_features] if args.default_size: bench_rows = [0] bench_dims = [0] param_override_list = extract_param_overrides(args.param_sweep) cuml_param_override_list = extract_param_overrides(args.cuml_param_sweep) cpu_param_override_list = extract_param_overrides(args.cpu_param_sweep) dataset_param_override_list = extract_param_overrides( args.dataset_param_sweep) if args.algorithms: algos_to_run = [] for name in args.algorithms: algo = algorithms.algorithm_by_name(name) if not algo: raise ValueError("No %s 'algorithm' found" % name) algos_to_run.append(algo) else: # Run all by default algos_to_run = algorithms.all_algorithms() results_df = runners.run_variations( algos_to_run, dataset_name=args.dataset, bench_rows=bench_rows, bench_dims=bench_dims, input_type=args.input_type, test_fraction=args.test_split, param_override_list=param_override_list,
args.dataset_type = params['dataset_type'] if 'n_samples' in params: args.n_samples = params['n_samples'] if 'n_features' in params: args.n_features = params['n_features'] if 'dataset_format' in params: args.dataset_format = params['dataset_format'] if 'data_kwargs' in params: args.data_kwargs = params['data_kwargs'] if 'setup_kwargs' in params: args.setup_kwargs = params['setup_kwargs'] if 'training_kwargs' in params: args.training_kwargs = params['training_kwargs'] if 'inference_kwargs' in params: args.inference_kwargs = params['inference_kwargs'] if len(args.json): parse_json(args) dataset = datagen.gen_data(args.dataset_type, args.dataset_format, n_samples=args.n_samples, n_features=args.n_features, **args.data_kwargs) algo = algorithms.algorithm_by_name(args.algo_name) cuml_setup = setup_bench('cuml', algo, 'inference', dataset, args.setup_kwargs, args.training_kwargs) algo.run_cuml(dataset, bench_args=args.inference_kwargs, **cuml_setup)
def _benchmark_algo( benchmarker, algo_name, bench_step, dataset, setup_kwargs={}, training_kwargs={}, inference_kwargs={}, client=None ): """ Benchmark utility Parameters ---------- benchmarker : Pytest benchmark function, allows to enclose the code that should be benchmarked algo_name : Algorithm/model name, can be found in the algorithms.py file bench_step : Either 'training' or 'inference', describe the algorithm/model step to be benchmarked dataset : Tuple with the data and a dictionnary that describes how it was built. The dictionnary can be later used during the NVTX benchmark. setup_kwargs : Algorithm/model setup kwargs training_kwargs : Algorithm/model training kwargs inference_kwargs : Algorithm/model inference kwargs client : Dask client used in MNMG settings """ # Get data and dict describing how it was built dataset, data_kwargs = dataset # The presence of a Dask client signifies MNMG mode MNMG_mode = client is not None # Distribute data in MNMG settings if MNMG_mode: # Add the client to the setup kwargs used by model instantiation setup_kwargs['client'] = client # Exception : data is scattered by the MNMG DBSCAN model itself if algo_name != 'MNMG.DBSCAN': # Distribute data dataset = [distribute(client, d) for d in dataset] # Search AlgorithmPair instance by name algo = algorithms.algorithm_by_name(algo_name) # Setup the AlgorithmPair and the model to be ready for benchmark on GPU cuml_setup = setup_bench('cuml', algo, bench_step, dataset, setup_kwargs, training_kwargs) # Pytest benchmark if bench_step == 'training': benchmarker(algo.run_cuml, dataset, bench_args=training_kwargs, **cuml_setup) elif bench_step == 'inference': benchmarker(algo.run_cuml, dataset, bench_args=inference_kwargs, **cuml_setup) # CPU benchmark and NVTX benchmark (only in SG mode) if not MNMG_mode: # Check that the cuML model has a CPU equivalency if algo.cpu_class: # Convert sataset to a Numpy array cpu_dataset = datagen._convert_to_numpy(dataset) # Setup the AlgorithmPair and the model # to be ready for benchmark on CPU cpu_setup = setup_bench('cpu', algo, bench_step, cpu_dataset, setup_kwargs, training_kwargs) # CPU benchmark cpu_bench(algo, bench_step, cpu_dataset, inference_kwargs, cpu_setup) # NVTX benchmark performs both the training and inference at once # but only when bench_step == 'inference' if bench_step == 'inference': # NVTX benchmark nvtx_profiling(algo_name, data_kwargs, setup_kwargs, training_kwargs, inference_kwargs)