Пример #1
0
def test_cs_vae():
    # Build data and mlmodel
    data_name = "adult"
    data = OnlineCatalog(data_name)

    model = MLModelCatalog(data, "ann", backend="pytorch")

    test_input = np.zeros((1, 13))
    test_input = torch.Tensor(test_input)
    test_class = torch.Tensor(np.array([[0, 0]]))

    csvae = CSVAE(data_name, layers=[11, 16, 8], mutable_mask=model.get_mutable_mask())

    csvae.fit(data=data.df[model.feature_input_order + [data.target]], epochs=1)

    output = csvae.predict(test_input, test_class)
    test_reconstructed = output[0]

    assert test_reconstructed.shape == test_input.shape

    # test loading vae
    new_csvae = CSVAE(
        data_name, layers=[11, 16, 8], mutable_mask=model.get_mutable_mask()
    )

    new_csvae.load(11)
Пример #2
0
def test_variational_autoencoder():
    # Build data and mlmodel
    data_name = "adult"
    data = OnlineCatalog(data_name)

    model = MLModelCatalog(data, "ann", backend="pytorch")

    device = "cuda" if torch.cuda.is_available() else "cpu"
    test_input = np.zeros((1, 13))
    test_input = torch.Tensor(test_input).to(device)

    vae = VariationalAutoencoder(
        data_name, layers=[11, 512, 256, 8], mutable_mask=model.get_mutable_mask()
    )

    vae.fit(xtrain=data.df[model.feature_input_order])

    test_reconstructed, _, _ = vae.predict(test_input)

    assert test_reconstructed.shape == test_input.shape

    # test loading vae
    new_vae = VariationalAutoencoder(
        data_name, layers=[11, 512, 256, 8], mutable_mask=model.get_mutable_mask()
    )

    new_vae.load(11)
Пример #3
0
def test_causal_recourse():

    scm = CausalModel("sanity-3-lin")
    data = scm.generate_dataset(10000)

    training_params = {"lr": 0.8, "epochs": 10, "batch_size": 16}

    model_type = "linear"
    model = MLModelCatalog(data,
                           model_type,
                           load_online=False,
                           backend="tensorflow")
    model.train(
        learning_rate=training_params["lr"],
        epochs=training_params["epochs"],
        batch_size=training_params["batch_size"],
    )

    # get factuals
    factuals = predict_negative_instances(model, data.df)[:5]
    assert len(factuals) > 0

    hyperparams = {
        "scm": scm,
    }
    cfs = CausalRecourse(model, hyperparams).get_counterfactuals(factuals)

    assert len(cfs) == len(factuals)
Пример #4
0
def test_predictions_pt(model_type, data_name):
    data = OnlineCatalog(data_name)
    model = MLModelCatalog(data, model_type, backend="pytorch")

    single_sample = data.df.iloc[22]
    single_sample = single_sample[model.feature_input_order].values.reshape((1, -1))
    single_sample_torch = torch.Tensor(single_sample)

    samples = data.df.iloc[0:22]
    samples = samples[model.feature_input_order].values
    samples_torch = torch.Tensor(samples)

    # Test single non probabilistic predictions
    single_prediction = model.predict(single_sample)
    expected_shape = tuple((1, 1))
    assert single_prediction.shape == expected_shape
    assert isinstance(single_prediction, np.ndarray)

    single_prediction_torch = model.predict(single_sample_torch)
    expected_shape = tuple((1, 1))
    assert single_prediction_torch.shape == expected_shape
    assert torch.is_tensor(single_prediction_torch)

    # bulk non probabilistic predictions
    predictions = model.predict(samples)
    expected_shape = tuple((22, 1))
    assert predictions.shape == expected_shape
    assert isinstance(predictions, np.ndarray)

    predictions_torch = model.predict(samples_torch)
    expected_shape = tuple((22, 1))
    assert predictions_torch.shape == expected_shape
    assert torch.is_tensor(predictions_torch)

    # Test single probabilistic predictions
    single_predict_proba = model.predict_proba(single_sample)
    expected_shape = tuple((1, 2))
    assert single_predict_proba.shape == expected_shape
    assert isinstance(single_predict_proba, np.ndarray)

    single_predict_proba_torch = model.predict_proba(single_sample_torch)
    expected_shape = tuple((1, 2))
    assert single_predict_proba_torch.shape == expected_shape
    assert torch.is_tensor(single_predict_proba_torch)

    # bulk probabilistic predictions
    predictions_proba = model.predict_proba(samples)
    expected_shape = tuple((22, 2))
    assert predictions_proba.shape == expected_shape
    assert isinstance(single_predict_proba, np.ndarray)

    predictions_proba_torch = model.predict_proba(samples_torch)
    expected_shape = tuple((22, 2))
    assert predictions_proba_torch.shape == expected_shape
    assert torch.is_tensor(predictions_proba_torch)
Пример #5
0
def test_properties():
    data_name = "adult"
    data = OnlineCatalog(data_name)

    model_tf_adult = MLModelCatalog(data, "ann", backend="tensorflow")

    exp_backend_tf = "tensorflow"
    exp_feature_order_adult = [
        "age",
        "fnlwgt",
        "education-num",
        "capital-gain",
        "capital-loss",
        "hours-per-week",
        "workclass_Private",
        "marital-status_Non-Married",
        "occupation_Other",
        "relationship_Non-Husband",
        "race_White",
        "sex_Male",
        "native-country_US",
    ]

    assert model_tf_adult.backend == exp_backend_tf
    assert model_tf_adult.feature_input_order == exp_feature_order_adult
Пример #6
0
def test_forest_properties():
    data_name = "adult"
    data = OnlineCatalog(data_name)

    model = MLModelCatalog(data, "forest", backend="sklearn")

    assert model is not None
Пример #7
0
def test_predictions_with_pipeline(model_type, data_name):
    data = OnlineCatalog(data_name)

    model_tf_adult = MLModelCatalog(data, model_type, backend="tensorflow")
    model_tf_adult.use_pipeline = True

    single_sample = data.df.iloc[22].to_frame().T
    samples = data.df.iloc[0:22]

    # Test single and bulk non probabilistic predictions
    single_prediction_tf = model_tf_adult.predict(single_sample)
    expected_shape = tuple((1, 1))
    assert single_prediction_tf.shape == expected_shape

    predictions_tf = model_tf_adult.predict(samples)
    expected_shape = tuple((22, 1))
    assert predictions_tf.shape == expected_shape

    # Test single and bulk probabilistic predictions
    single_predict_proba_tf = model_tf_adult.predict_proba(single_sample)
    expected_shape = tuple((1, 2))
    assert single_predict_proba_tf.shape == expected_shape

    predictions_proba_tf = model_tf_adult.predict_proba(samples)
    expected_shape = tuple((22, 2))
    assert predictions_proba_tf.shape == expected_shape
Пример #8
0
def test_mlmodel(model_type):
    data_name = "adult"
    data = OnlineCatalog(data_name)

    model_catalog = MLModelCatalog(data, model_type, backend="tensorflow")

    assert issubclass(MLModelCatalog, MLModel)
    assert isinstance(model_catalog, MLModel)
    assert issubclass(MLModel, ABC)
Пример #9
0
def test_get_tree():
    data_name = "adult"
    data = OnlineCatalog(data_name)
    model = MLModelCatalog(data, "forest", "xgboost")
    booster = model.tree_iterator[0]

    tree = _get_tree_from_booster(booster)

    assert isinstance(tree, list)
    assert isinstance(tree[0], str)
Пример #10
0
def test_autoencoder():
    # Build data and mlmodel
    data_name = "adult"
    data = OnlineCatalog(data_name)

    model = MLModelCatalog(data, "ann", backend="tensorflow")
    test_input = tf.Variable(np.zeros((1, 13)), dtype=tf.float32)

    ae = Autoencoder(data_name, [len(model.feature_input_order), 20, 10, 5])
    fitted_ae = train_autoencoder(
        ae,
        data,
        model.feature_input_order,
        epochs=5,
        save=False,
    )
    test_output = fitted_ae(test_input)

    expected_shape = (1, 13)
    assert test_output.shape == expected_shape

    # test with different lengths
    ae = Autoencoder(data_name, [len(model.feature_input_order), 5])
    fitted_ae = train_autoencoder(
        ae,
        data,
        model.feature_input_order,
        epochs=5,
        save=False,
    )
    test_output = fitted_ae(test_input)

    expected_shape = (1, 13)
    assert test_output.shape == expected_shape

    # test with different loss function
    def custom_loss(y_true, y_pred):
        return K.max(y_true - y_pred)

    ae = Autoencoder(
        data_name, [len(model.feature_input_order), 20, 15, 10, 8, 5], loss=custom_loss
    )
    fitted_ae = train_autoencoder(
        ae,
        data,
        model.feature_input_order,
        epochs=5,
        save=False,
    )
    test_output = fitted_ae(test_input)

    expected_shape = (1, 13)
    assert test_output.shape == expected_shape
Пример #11
0
def test_cfmodel():
    data_name = "adult"
    data_catalog = OnlineCatalog(data_name)

    hyperparams = {"num": 1, "desired_class": 1}
    model_catalog = MLModelCatalog(data_catalog, "ann", backend="tensorflow")

    dice = Dice(model_catalog, hyperparams)

    assert issubclass(Dice, RecourseMethod)
    assert isinstance(dice, RecourseMethod)
    assert issubclass(RecourseMethod, ABC)
Пример #12
0
def test_predictions_tf(model_type, data_name):
    data = OnlineCatalog(data_name)

    model_tf_adult = MLModelCatalog(data, model_type, backend="tensorflow")

    single_sample = data.df.iloc[22]
    single_sample = single_sample[model_tf_adult.feature_input_order].values.reshape(
        (1, -1)
    )
    samples = data.df.iloc[0:22]
    samples = samples[model_tf_adult.feature_input_order].values

    # Test single and bulk non probabilistic predictions
    single_prediction_tf = model_tf_adult.predict(single_sample)
    expected_shape = tuple((1, 1))
    assert single_prediction_tf.shape == expected_shape

    predictions_tf = model_tf_adult.predict(samples)
    expected_shape = tuple((22, 1))
    assert predictions_tf.shape == expected_shape

    # Test single and bulk probabilistic predictions
    single_predict_proba_tf = model_tf_adult.predict_proba(single_sample)
    expected_shape = tuple((1, 2))
    assert single_predict_proba_tf.shape == expected_shape

    predictions_proba_tf = model_tf_adult.predict_proba(samples)
    expected_shape = tuple((22, 2))
    assert predictions_proba_tf.shape == expected_shape
Пример #13
0
def test_parse_booster():
    data_name = "adult"
    data = OnlineCatalog(data_name)
    model = MLModelCatalog(data, "forest", "xgboost")
    tree = model.tree_iterator[0]

    children_left, children_right, thresholds, features, scores = parse_booster(
        tree)

    assert len(children_left) > 0
    assert len(children_right) > 0
    assert len(thresholds) > 0
    assert len(features) > 0
    assert len(scores) > 0
Пример #14
0
def make_benchmark(data_name="adult", model_name="ann"):
    # get data and mlmodel
    data = OnlineCatalog(data_name)
    model = MLModelCatalog(data, model_name, backend="tensorflow")

    # get factuals
    factuals = predict_negative_instances(model, data.df)
    test_factual = factuals.iloc[:5]

    # get recourse method
    hyperparams = {"num": 1, "desired_class": 1}
    recourse_method = Dice(model, hyperparams)

    # make benchmark object
    benchmark = Benchmark(model, recourse_method, test_factual)

    return benchmark
Пример #15
0
def test_parse_node():
    data_name = "adult"
    data = OnlineCatalog(data_name)
    model = MLModelCatalog(data, "forest", "xgboost")
    booster = model.tree_iterator[0]

    tree = _get_tree_from_booster(booster)

    leaf_str = None
    node_str = None
    for node in tree:
        if "leaf" in node and leaf_str is None:
            leaf_str = node
        elif "leaf" not in node and node_str is None:
            node_str = node

    (
        node_id,
        threshold,
        feature,
        left_child,
        right_child,
        score,
    ) = _parse_node(node_str)

    assert threshold != TREE_UNDEFINED
    assert feature != TREE_UNDEFINED
    assert left_child != TREE_LEAF
    assert right_child != TREE_LEAF
    assert score is None

    (
        node_id,
        threshold,
        feature,
        left_child,
        right_child,
        score,
    ) = _parse_node(leaf_str)

    assert threshold == TREE_UNDEFINED
    assert feature == TREE_UNDEFINED
    assert left_child == TREE_LEAF
    assert right_child == TREE_LEAF
    assert 0 <= score <= 1
Пример #16
0
def test_save_and_load():
    with tf.Session() as sess:
        # Build data and mlmodel
        data_name = "adult"
        data = OnlineCatalog(data_name)

        model = MLModelCatalog(data, "ann", backend="tensorflow")
        test_input = tf.Variable(np.zeros((1, 13)), dtype=tf.float32)

        ae = Autoencoder(data_name, [len(model.feature_input_order), 20, 10, 5])
        fitted_ae = train_autoencoder(
            ae,
            data,
            model.feature_input_order,
            epochs=5,
            save=True,
        )

        expected = fitted_ae(test_input)

        loaded_ae = Autoencoder(data_name).load(len(model.feature_input_order))
        actual = loaded_ae(test_input)

        assert (actual.eval(session=sess) == expected.eval(session=sess)).all()
Пример #17
0
def _train_model(data, model_type, backend):
    model = MLModelCatalog(data,
                           model_type,
                           load_online=False,
                           backend=backend)
    params = training_params[model_type][data.name]
    if model_type == "forest":
        model.train(
            force_train=True,
            max_depth=params["max_depth"],
            n_estimators=params["n_estimators"],
        )
    else:
        model.train(
            force_train=True,
            learning_rate=params["lr"],
            epochs=params["epochs"],
            batch_size=params["batch_size"],
        )
    return model
Пример #18
0
    if rm in torch_methods:
        backend = "pytorch"
    for data_name in args.dataset:
        dataset = OnlineCatalog(data_name)
        for model_type in args.type:
            log.info("=====================================")
            log.info("Recourse method: {}".format(rm))
            log.info("Dataset: {}".format(data_name))
            log.info("Model type: {}".format(model_type))

            if rm in session_models:
                graph = Graph()
                with graph.as_default():
                    ann_sess = Session()
                    with ann_sess.as_default():
                        mlmodel_sess = MLModelCatalog(dataset, model_type,
                                                      backend)

                        factuals_sess = predict_negative_instances(
                            mlmodel_sess, dataset)
                        factuals_sess = factuals_sess.iloc[:args.
                                                           number_of_samples]
                        factuals_sess = factuals_sess.reset_index(drop=True)

                        recourse_method_sess = initialize_recourse_method(
                            rm,
                            mlmodel_sess,
                            dataset,
                            data_name,
                            model_type,
                            setup,
                            sess=ann_sess,