def test_cs_vae(): # Build data and mlmodel data_name = "adult" data = OnlineCatalog(data_name) model = MLModelCatalog(data, "ann", backend="pytorch") test_input = np.zeros((1, 13)) test_input = torch.Tensor(test_input) test_class = torch.Tensor(np.array([[0, 0]])) csvae = CSVAE(data_name, layers=[11, 16, 8], mutable_mask=model.get_mutable_mask()) csvae.fit(data=data.df[model.feature_input_order + [data.target]], epochs=1) output = csvae.predict(test_input, test_class) test_reconstructed = output[0] assert test_reconstructed.shape == test_input.shape # test loading vae new_csvae = CSVAE( data_name, layers=[11, 16, 8], mutable_mask=model.get_mutable_mask() ) new_csvae.load(11)
def test_variational_autoencoder(): # Build data and mlmodel data_name = "adult" data = OnlineCatalog(data_name) model = MLModelCatalog(data, "ann", backend="pytorch") device = "cuda" if torch.cuda.is_available() else "cpu" test_input = np.zeros((1, 13)) test_input = torch.Tensor(test_input).to(device) vae = VariationalAutoencoder( data_name, layers=[11, 512, 256, 8], mutable_mask=model.get_mutable_mask() ) vae.fit(xtrain=data.df[model.feature_input_order]) test_reconstructed, _, _ = vae.predict(test_input) assert test_reconstructed.shape == test_input.shape # test loading vae new_vae = VariationalAutoencoder( data_name, layers=[11, 512, 256, 8], mutable_mask=model.get_mutable_mask() ) new_vae.load(11)
def test_causal_recourse(): scm = CausalModel("sanity-3-lin") data = scm.generate_dataset(10000) training_params = {"lr": 0.8, "epochs": 10, "batch_size": 16} model_type = "linear" model = MLModelCatalog(data, model_type, load_online=False, backend="tensorflow") model.train( learning_rate=training_params["lr"], epochs=training_params["epochs"], batch_size=training_params["batch_size"], ) # get factuals factuals = predict_negative_instances(model, data.df)[:5] assert len(factuals) > 0 hyperparams = { "scm": scm, } cfs = CausalRecourse(model, hyperparams).get_counterfactuals(factuals) assert len(cfs) == len(factuals)
def test_predictions_pt(model_type, data_name): data = OnlineCatalog(data_name) model = MLModelCatalog(data, model_type, backend="pytorch") single_sample = data.df.iloc[22] single_sample = single_sample[model.feature_input_order].values.reshape((1, -1)) single_sample_torch = torch.Tensor(single_sample) samples = data.df.iloc[0:22] samples = samples[model.feature_input_order].values samples_torch = torch.Tensor(samples) # Test single non probabilistic predictions single_prediction = model.predict(single_sample) expected_shape = tuple((1, 1)) assert single_prediction.shape == expected_shape assert isinstance(single_prediction, np.ndarray) single_prediction_torch = model.predict(single_sample_torch) expected_shape = tuple((1, 1)) assert single_prediction_torch.shape == expected_shape assert torch.is_tensor(single_prediction_torch) # bulk non probabilistic predictions predictions = model.predict(samples) expected_shape = tuple((22, 1)) assert predictions.shape == expected_shape assert isinstance(predictions, np.ndarray) predictions_torch = model.predict(samples_torch) expected_shape = tuple((22, 1)) assert predictions_torch.shape == expected_shape assert torch.is_tensor(predictions_torch) # Test single probabilistic predictions single_predict_proba = model.predict_proba(single_sample) expected_shape = tuple((1, 2)) assert single_predict_proba.shape == expected_shape assert isinstance(single_predict_proba, np.ndarray) single_predict_proba_torch = model.predict_proba(single_sample_torch) expected_shape = tuple((1, 2)) assert single_predict_proba_torch.shape == expected_shape assert torch.is_tensor(single_predict_proba_torch) # bulk probabilistic predictions predictions_proba = model.predict_proba(samples) expected_shape = tuple((22, 2)) assert predictions_proba.shape == expected_shape assert isinstance(single_predict_proba, np.ndarray) predictions_proba_torch = model.predict_proba(samples_torch) expected_shape = tuple((22, 2)) assert predictions_proba_torch.shape == expected_shape assert torch.is_tensor(predictions_proba_torch)
def test_properties(): data_name = "adult" data = OnlineCatalog(data_name) model_tf_adult = MLModelCatalog(data, "ann", backend="tensorflow") exp_backend_tf = "tensorflow" exp_feature_order_adult = [ "age", "fnlwgt", "education-num", "capital-gain", "capital-loss", "hours-per-week", "workclass_Private", "marital-status_Non-Married", "occupation_Other", "relationship_Non-Husband", "race_White", "sex_Male", "native-country_US", ] assert model_tf_adult.backend == exp_backend_tf assert model_tf_adult.feature_input_order == exp_feature_order_adult
def test_forest_properties(): data_name = "adult" data = OnlineCatalog(data_name) model = MLModelCatalog(data, "forest", backend="sklearn") assert model is not None
def test_predictions_with_pipeline(model_type, data_name): data = OnlineCatalog(data_name) model_tf_adult = MLModelCatalog(data, model_type, backend="tensorflow") model_tf_adult.use_pipeline = True single_sample = data.df.iloc[22].to_frame().T samples = data.df.iloc[0:22] # Test single and bulk non probabilistic predictions single_prediction_tf = model_tf_adult.predict(single_sample) expected_shape = tuple((1, 1)) assert single_prediction_tf.shape == expected_shape predictions_tf = model_tf_adult.predict(samples) expected_shape = tuple((22, 1)) assert predictions_tf.shape == expected_shape # Test single and bulk probabilistic predictions single_predict_proba_tf = model_tf_adult.predict_proba(single_sample) expected_shape = tuple((1, 2)) assert single_predict_proba_tf.shape == expected_shape predictions_proba_tf = model_tf_adult.predict_proba(samples) expected_shape = tuple((22, 2)) assert predictions_proba_tf.shape == expected_shape
def test_mlmodel(model_type): data_name = "adult" data = OnlineCatalog(data_name) model_catalog = MLModelCatalog(data, model_type, backend="tensorflow") assert issubclass(MLModelCatalog, MLModel) assert isinstance(model_catalog, MLModel) assert issubclass(MLModel, ABC)
def test_get_tree(): data_name = "adult" data = OnlineCatalog(data_name) model = MLModelCatalog(data, "forest", "xgboost") booster = model.tree_iterator[0] tree = _get_tree_from_booster(booster) assert isinstance(tree, list) assert isinstance(tree[0], str)
def test_autoencoder(): # Build data and mlmodel data_name = "adult" data = OnlineCatalog(data_name) model = MLModelCatalog(data, "ann", backend="tensorflow") test_input = tf.Variable(np.zeros((1, 13)), dtype=tf.float32) ae = Autoencoder(data_name, [len(model.feature_input_order), 20, 10, 5]) fitted_ae = train_autoencoder( ae, data, model.feature_input_order, epochs=5, save=False, ) test_output = fitted_ae(test_input) expected_shape = (1, 13) assert test_output.shape == expected_shape # test with different lengths ae = Autoencoder(data_name, [len(model.feature_input_order), 5]) fitted_ae = train_autoencoder( ae, data, model.feature_input_order, epochs=5, save=False, ) test_output = fitted_ae(test_input) expected_shape = (1, 13) assert test_output.shape == expected_shape # test with different loss function def custom_loss(y_true, y_pred): return K.max(y_true - y_pred) ae = Autoencoder( data_name, [len(model.feature_input_order), 20, 15, 10, 8, 5], loss=custom_loss ) fitted_ae = train_autoencoder( ae, data, model.feature_input_order, epochs=5, save=False, ) test_output = fitted_ae(test_input) expected_shape = (1, 13) assert test_output.shape == expected_shape
def test_cfmodel(): data_name = "adult" data_catalog = OnlineCatalog(data_name) hyperparams = {"num": 1, "desired_class": 1} model_catalog = MLModelCatalog(data_catalog, "ann", backend="tensorflow") dice = Dice(model_catalog, hyperparams) assert issubclass(Dice, RecourseMethod) assert isinstance(dice, RecourseMethod) assert issubclass(RecourseMethod, ABC)
def test_predictions_tf(model_type, data_name): data = OnlineCatalog(data_name) model_tf_adult = MLModelCatalog(data, model_type, backend="tensorflow") single_sample = data.df.iloc[22] single_sample = single_sample[model_tf_adult.feature_input_order].values.reshape( (1, -1) ) samples = data.df.iloc[0:22] samples = samples[model_tf_adult.feature_input_order].values # Test single and bulk non probabilistic predictions single_prediction_tf = model_tf_adult.predict(single_sample) expected_shape = tuple((1, 1)) assert single_prediction_tf.shape == expected_shape predictions_tf = model_tf_adult.predict(samples) expected_shape = tuple((22, 1)) assert predictions_tf.shape == expected_shape # Test single and bulk probabilistic predictions single_predict_proba_tf = model_tf_adult.predict_proba(single_sample) expected_shape = tuple((1, 2)) assert single_predict_proba_tf.shape == expected_shape predictions_proba_tf = model_tf_adult.predict_proba(samples) expected_shape = tuple((22, 2)) assert predictions_proba_tf.shape == expected_shape
def test_parse_booster(): data_name = "adult" data = OnlineCatalog(data_name) model = MLModelCatalog(data, "forest", "xgboost") tree = model.tree_iterator[0] children_left, children_right, thresholds, features, scores = parse_booster( tree) assert len(children_left) > 0 assert len(children_right) > 0 assert len(thresholds) > 0 assert len(features) > 0 assert len(scores) > 0
def make_benchmark(data_name="adult", model_name="ann"): # get data and mlmodel data = OnlineCatalog(data_name) model = MLModelCatalog(data, model_name, backend="tensorflow") # get factuals factuals = predict_negative_instances(model, data.df) test_factual = factuals.iloc[:5] # get recourse method hyperparams = {"num": 1, "desired_class": 1} recourse_method = Dice(model, hyperparams) # make benchmark object benchmark = Benchmark(model, recourse_method, test_factual) return benchmark
def test_parse_node(): data_name = "adult" data = OnlineCatalog(data_name) model = MLModelCatalog(data, "forest", "xgboost") booster = model.tree_iterator[0] tree = _get_tree_from_booster(booster) leaf_str = None node_str = None for node in tree: if "leaf" in node and leaf_str is None: leaf_str = node elif "leaf" not in node and node_str is None: node_str = node ( node_id, threshold, feature, left_child, right_child, score, ) = _parse_node(node_str) assert threshold != TREE_UNDEFINED assert feature != TREE_UNDEFINED assert left_child != TREE_LEAF assert right_child != TREE_LEAF assert score is None ( node_id, threshold, feature, left_child, right_child, score, ) = _parse_node(leaf_str) assert threshold == TREE_UNDEFINED assert feature == TREE_UNDEFINED assert left_child == TREE_LEAF assert right_child == TREE_LEAF assert 0 <= score <= 1
def test_save_and_load(): with tf.Session() as sess: # Build data and mlmodel data_name = "adult" data = OnlineCatalog(data_name) model = MLModelCatalog(data, "ann", backend="tensorflow") test_input = tf.Variable(np.zeros((1, 13)), dtype=tf.float32) ae = Autoencoder(data_name, [len(model.feature_input_order), 20, 10, 5]) fitted_ae = train_autoencoder( ae, data, model.feature_input_order, epochs=5, save=True, ) expected = fitted_ae(test_input) loaded_ae = Autoencoder(data_name).load(len(model.feature_input_order)) actual = loaded_ae(test_input) assert (actual.eval(session=sess) == expected.eval(session=sess)).all()
def _train_model(data, model_type, backend): model = MLModelCatalog(data, model_type, load_online=False, backend=backend) params = training_params[model_type][data.name] if model_type == "forest": model.train( force_train=True, max_depth=params["max_depth"], n_estimators=params["n_estimators"], ) else: model.train( force_train=True, learning_rate=params["lr"], epochs=params["epochs"], batch_size=params["batch_size"], ) return model
if rm in torch_methods: backend = "pytorch" for data_name in args.dataset: dataset = OnlineCatalog(data_name) for model_type in args.type: log.info("=====================================") log.info("Recourse method: {}".format(rm)) log.info("Dataset: {}".format(data_name)) log.info("Model type: {}".format(model_type)) if rm in session_models: graph = Graph() with graph.as_default(): ann_sess = Session() with ann_sess.as_default(): mlmodel_sess = MLModelCatalog(dataset, model_type, backend) factuals_sess = predict_negative_instances( mlmodel_sess, dataset) factuals_sess = factuals_sess.iloc[:args. number_of_samples] factuals_sess = factuals_sess.reset_index(drop=True) recourse_method_sess = initialize_recourse_method( rm, mlmodel_sess, dataset, data_name, model_type, setup, sess=ann_sess,