def main(): # experiment settings nruns = args.nruns if args.benchmark else 1 batch_sizes = [int(elem) for elem in args.batch] # load data and instances to be explained data = load_data() predictor = load_model('assets/predictor.pkl') # download if not available locally y_test, X_test_proc = data['all']['y']['test'], data['all']['X']['processed']['test'] logging.info(f"Test accuracy: {accuracy_score(y_test, predictor.predict(X_test_proc))}") X_explain = data['all']['X']['processed']['test'].toarray() # instances to be explained if args.workers == -1: # sequential benchmark logging.info(f"Running sequential benchmark without ray ...") distributed_opts = {'batch_size': None, 'n_cpus': None, 'actor_cpu_fraction': 1.0} explainer = fit_kernel_shap_explainer(predictor, data, distributed_opts=distributed_opts) run_explainer(explainer, X_explain, distributed_opts, nruns) # run distributed benchmark or simply explain on a number of cores, depending on args.benchmark value else: workers_range = range(1, args.workers + 1) if args.benchmark == 1 else range(args.workers, args.workers + 1) for workers in workers_range: for batch_size in batch_sizes: logging.info(f"Running experiment using {workers} actors...") logging.info(f"Running experiment with batch size {batch_size}") distributed_opts = {'batch_size': int(batch_size), 'n_cpus': workers, 'actor_cpu_fraction': 1.0} explainer = fit_kernel_shap_explainer(predictor, data, distributed_opts) run_explainer(explainer, X_explain, distributed_opts, nruns) ray.shutdown()
def main(): # initialise ray ray.init(address='auto') # experiment settings nruns = args.nruns batch_sizes = [int(elem) for elem in args.batch] # load data and instances to be explained data = load_data() predictor = load_model( 'assets/predictor.pkl') # download if not available locally y_test, X_test_proc = data['all']['y']['test'], data['all']['X'][ 'processed']['test'] logging.info( f"Test accuracy: {accuracy_score(y_test, predictor.predict(X_test_proc))}" ) X_explain = data['all']['X']['processed']['test'].toarray( ) # instances to be explained distributed_opts = {'n_cpus': args.workers} explainer = fit_kernel_shap_explainer(predictor, data, distributed_opts) for batch_size in batch_sizes: logging.info(f"Running experiment using {args.workers} actors...") logging.info(f"Batch size: {batch_size}") run_explainer(explainer, X_explain, distributed_opts, nruns, batch_size)
def prepare_explainer_args( data: Dict[str, Any]) -> Tuple[str, np.ndarray, dict, dict]: """ Extracts the name of the features (group_names) and the columns corresponding to each feature in the faeture matrix (group_names) from the `data` dict and defines the explainer arguments. The background data necessary to initialise the explainer is also extracted from the same dictionary. Parameters ---------- data A dictionary that contains all information necessary to initialise the explainer. Returns ------- A tuple containing the positional and keyword arguments necessary for initialising the explainers. """ groups = data['all']['groups'] group_names = data['all']['group_names'] background_data = data['background']['X']['preprocessed'] assert background_data.shape[0] == 100 init_kwargs = {'link': 'logit', 'feature_names': group_names, 'seed': 0} fit_kwargs = {'groups': groups, 'group_names': group_names} predictor = load_model(PREDICTOR_URL) worker_args = (predictor, background_data, init_kwargs, fit_kwargs) return worker_args
def main(path_to_X_explain, path_to_training_data): nruns = args.nruns if args.benchmark else 1 batch_sizes = [int(elem) for elem in args.batch] # load data and instances to be explained predictor = load_model( 'assets/predictor.pkl') # download if not available locally X_explain = read_file(path_to_X_explain) data = np.array(read_file(path_to_training_data)).astype(np.float64) X_explain = np.array(X_explain).astype( np.float64) # instances to be explained if args.workers == -1: # sequential benchmark logging.info(f"Running sequential benchmark without ray ...") distributed_opts = { 'batch_size': None, 'n_cpus': None, 'actor_cpu_fraction': 1.0 } explainer = fit_kernel_shap_explainer( predictor, data, distributed_opts=distributed_opts) run_explainer(explainer, X_explain, distributed_opts, nruns) # run distributed benchmark or simply explain on a number of cores, depending on args.benchmark value else: workers_range = range(1, args.workers + 1) if args.benchmark == 1 else range( args.workers, args.workers + 1) for workers in workers_range: for batch_size in batch_sizes: logging.info(f"Running experiment using {workers} actors...") logging.info( f"Running experiment with batch size {batch_size}") distributed_opts = { 'batch_size': int(batch_size), 'n_cpus': workers, 'actor_cpu_fraction': 1.0 } explainer = fit_kernel_shap_explainer(predictor, data, distributed_opts) run_explainer(explainer, X_explain, distributed_opts, nruns) ray.shutdown()