示例#1
0
def main():

    # experiment settings
    nruns = args.nruns if args.benchmark else 1
    batch_sizes = [int(elem) for elem in args.batch]

    # load data and instances to be explained
    data = load_data()
    predictor = load_model('assets/predictor.pkl')  # download if not available locally
    y_test, X_test_proc = data['all']['y']['test'], data['all']['X']['processed']['test']
    logging.info(f"Test accuracy: {accuracy_score(y_test, predictor.predict(X_test_proc))}")
    X_explain = data['all']['X']['processed']['test'].toarray()  # instances to be explained

    if args.workers == -1:  # sequential benchmark
        logging.info(f"Running sequential benchmark without ray ...")
        distributed_opts = {'batch_size': None, 'n_cpus': None, 'actor_cpu_fraction': 1.0}
        explainer = fit_kernel_shap_explainer(predictor, data, distributed_opts=distributed_opts)
        run_explainer(explainer, X_explain, distributed_opts, nruns)
    # run distributed benchmark or simply explain on a number of cores, depending on args.benchmark value
    else:
        workers_range = range(1, args.workers + 1) if args.benchmark == 1 else range(args.workers, args.workers + 1)
        for workers in workers_range:
            for batch_size in batch_sizes:
                logging.info(f"Running experiment using {workers} actors...")
                logging.info(f"Running experiment with batch size {batch_size}")
                distributed_opts = {'batch_size': int(batch_size), 'n_cpus': workers, 'actor_cpu_fraction': 1.0}
                explainer = fit_kernel_shap_explainer(predictor, data, distributed_opts)
                run_explainer(explainer, X_explain, distributed_opts, nruns)
                ray.shutdown()
示例#2
0
def main():

    # initialise ray
    ray.init(address='auto')

    # experiment settings
    nruns = args.nruns
    batch_sizes = [int(elem) for elem in args.batch]

    # load data and instances to be explained
    data = load_data()
    predictor = load_model(
        'assets/predictor.pkl')  # download if not available locally
    y_test, X_test_proc = data['all']['y']['test'], data['all']['X'][
        'processed']['test']
    logging.info(
        f"Test accuracy: {accuracy_score(y_test, predictor.predict(X_test_proc))}"
    )
    X_explain = data['all']['X']['processed']['test'].toarray(
    )  # instances to be explained

    distributed_opts = {'n_cpus': args.workers}
    explainer = fit_kernel_shap_explainer(predictor, data, distributed_opts)
    for batch_size in batch_sizes:
        logging.info(f"Running experiment using {args.workers} actors...")
        logging.info(f"Batch size: {batch_size}")
        run_explainer(explainer, X_explain, distributed_opts, nruns,
                      batch_size)
示例#3
0
def prepare_explainer_args(
        data: Dict[str, Any]) -> Tuple[str, np.ndarray, dict, dict]:
    """
    Extracts the name of the features (group_names) and the columns corresponding to each feature in the faeture matrix
    (group_names) from the `data` dict and defines the explainer arguments. The background data necessary to initialise
    the explainer is also extracted from the same dictionary.

    Parameters
    ----------
    data
        A dictionary that contains all information necessary to initialise the explainer.

    Returns
    -------
    A tuple containing the positional and keyword arguments necessary for initialising the explainers.
    """

    groups = data['all']['groups']
    group_names = data['all']['group_names']
    background_data = data['background']['X']['preprocessed']
    assert background_data.shape[0] == 100
    init_kwargs = {'link': 'logit', 'feature_names': group_names, 'seed': 0}
    fit_kwargs = {'groups': groups, 'group_names': group_names}
    predictor = load_model(PREDICTOR_URL)
    worker_args = (predictor, background_data, init_kwargs, fit_kwargs)

    return worker_args
示例#4
0
def main(path_to_X_explain, path_to_training_data):
    nruns = args.nruns if args.benchmark else 1

    batch_sizes = [int(elem) for elem in args.batch]

    # load data and instances to be explained
    predictor = load_model(
        'assets/predictor.pkl')  # download if not available locally
    X_explain = read_file(path_to_X_explain)
    data = np.array(read_file(path_to_training_data)).astype(np.float64)

    X_explain = np.array(X_explain).astype(
        np.float64)  # instances to be explained

    if args.workers == -1:  # sequential benchmark
        logging.info(f"Running sequential benchmark without ray ...")
        distributed_opts = {
            'batch_size': None,
            'n_cpus': None,
            'actor_cpu_fraction': 1.0
        }
        explainer = fit_kernel_shap_explainer(
            predictor, data, distributed_opts=distributed_opts)
        run_explainer(explainer, X_explain, distributed_opts, nruns)
    # run distributed benchmark or simply explain on a number of cores, depending on args.benchmark value
    else:
        workers_range = range(1, args.workers +
                              1) if args.benchmark == 1 else range(
                                  args.workers, args.workers + 1)
        for workers in workers_range:
            for batch_size in batch_sizes:
                logging.info(f"Running experiment using {workers} actors...")
                logging.info(
                    f"Running experiment with batch size {batch_size}")
                distributed_opts = {
                    'batch_size': int(batch_size),
                    'n_cpus': workers,
                    'actor_cpu_fraction': 1.0
                }
                explainer = fit_kernel_shap_explainer(predictor, data,
                                                      distributed_opts)

                run_explainer(explainer, X_explain, distributed_opts, nruns)
                ray.shutdown()