示例#1
0
    def run_training_continuation(self, xgb_params_01, xgb_params_02,
                                  xgb_params_03):
        from sklearn.datasets import load_digits
        from sklearn.metrics import mean_squared_error

        digits_2class = load_digits(2)
        digits_5class = load_digits(5)

        X_2class = digits_2class['data']
        y_2class = digits_2class['target']

        X_5class = digits_5class['data']
        y_5class = digits_5class['target']

        dump_svmlight_file(X_2class, y_2class, temp_name)
        xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file)

        dtrain_2class = xgb.DMatrix({username: temp_enc_name})

        dump_svmlight_file(X_5class, y_5class, temp_name)
        xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file)

        dtrain_5class = xgb.DMatrix({username: temp_enc_name})

        gbdt_01 = xgb.train(xgb_params_01, dtrain_2class, num_boost_round=10)
        ntrees_01 = len(gbdt_01.get_dump())
        assert ntrees_01 == 10

        gbdt_02 = xgb.train(xgb_params_01, dtrain_2class, num_boost_round=0)
        gbdt_02.save_model(HOME_DIR + 'xgb_tc.model')

        #TODO(rishabh): add support for xgb_model
        """
示例#2
0
    def test_basic(self):
        dtrain = xgb.DMatrix({username: dpath + 'agaricus.txt.train.enc'})
        dtest = xgb.DMatrix({username: dpath + 'agaricus.txt.test.enc'})
        param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}
        # specify validations set to watch performance
        watchlist = [(dtrain, 'train')]
        num_round = 2
        bst = xgb.train(param, dtrain, num_round, watchlist)

        preds = bst.predict(dtrain)[0]
        # TODO(rishabh): support for get_label()
        """
        labels = dtrain.get_label()
        err = sum(1 for i in range(len(preds))
                  if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
        # error must be smaller than 10%
        assert err < 0.1

        preds = bst.predict(dtest)[0]
        labels = dtest.get_label()
        err = sum(1 for i in range(len(preds))
                  if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
        # error must be smaller than 10%
        assert err < 0.1
        """

        # TODO(rishabh): support for save_binary()
        """
 def test_boost_from_prediction(self):
     # Re-construct dtrain here to avoid modification
     margined = xgb.DMatrix({username: dpath + 'agaricus.txt.train.enc'})
     bst = xgb.train({'tree_method': 'hist'}, margined, 1)
     predt_0 = bst.predict(margined, output_margin=True)
     #TODO(rishabh): implement set_base_margin()
     """
示例#4
0
    def test_basic_rpc(self):
        channel_addr = "127.0.0.1:50052"
        xgb.init_client(user_name=username, sym_key_file=sym_key_file, priv_key_file=priv_key_file, cert_file=cert_file, remote_addr=channel_addr)
        xgb.attest(verify=False)

        dtrain = xgb.DMatrix({username: dpath + 'agaricus.txt.train.enc'})
        dtest = xgb.DMatrix({username: dpath + 'agaricus.txt.test.enc'})

        # Set training parameters
        params = {
                "tree_method": "hist",
                "n_gpus": "0",
                "objective": "binary:logistic",
                "min_child_weight": "1",
                "gamma": "0.1",
                "max_depth": "5",
                "verbosity": "0" 
        }

        num_rounds = 2
        booster = xgb.train(params, dtrain, num_rounds)

        predictions, num_preds = booster.predict(dtest, decrypt=False)

        preds = booster.decrypt_predictions(predictions, num_preds)
        ten_preds = preds[:10]
        
        labels = [0, 1, 0, 0, 0, 0, 1, 0, 1, 0]
        err = sum(1 for i in range(len(ten_preds))
                  if int(ten_preds[i] > 0.5) != labels[i]) / float(len(ten_preds))

        # error must be smaller than 10%
        assert err < 0.1
示例#5
0
 def build_model(self, max_depth, num_round):
     dtrain = xgb.DMatrix({username: dpath + 'agaricus.txt.train.enc'})
     param = {'max_depth': max_depth, 'objective': 'binary:logistic',
              'verbosity': 1}
     num_round = num_round
     bst = xgb.train(param, dtrain, num_round)
     return bst
    def test_dart(self):
        dtrain = xgb.DMatrix({username: dpath + 'agaricus.txt.train.enc'})
        dtest = xgb.DMatrix({username: dpath + 'agaricus.txt.test.enc'})
        param = {
            'max_depth': 5,
            'objective': 'binary:logistic',
            'eval_metric': 'logloss',
            'booster': 'dart',
            'verbosity': 1
        }
        # specify validations set to watch performance
        watchlist = [(dtest, 'eval'), (dtrain, 'train')]
        num_round = 2
        bst = xgb.train(param, dtrain, num_round, watchlist)
        # this is prediction
        preds = bst.predict(dtest, ntree_limit=num_round)[0]
        #TODO(rishabh): implement get_label()
        """
        labels = dtest.get_label()
        err = sum(1 for i in range(len(preds))
                  if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
        # error must be smaller than 10%
        assert err < 0.1
        """

        #TODO(rishabh): implement save_binary()
        """
        # save dmatrix into binary buffer
        dtest.save_binary('dtest.buffer')
        model_path = 'xgb.model.dart'
        # save model
        bst.save_model(model_path)
        # load model and data in
        bst2 = xgb.Booster(params=param, model_file='xgb.model.dart')
        dtest2 = xgb.DMatrix('dtest.buffer')
        preds2 = bst2.predict(dtest2, ntree_limit=num_round)[0]
        # assert they are the same
        assert np.sum(np.abs(preds2 - preds)) == 0
        """
        def my_logloss(preds, dtrain):
            return
            #TODO(rishabh): implement get_label()
            """
            labels = dtrain.get_label()
            return 'logloss', np.sum(
                np.log(np.where(labels, preds, 1 - preds)))
            """

        # check whether custom evaluation metrics work
        #TODO: implement feval (allow definition of a loss function?)
        """
        bst = xgb.train(param, dtrain, num_round, watchlist,
                        feval=my_logloss)
        preds3 = bst.predict(dtest, ntree_limit=num_round)[0]
        assert all(preds3 == preds)
        """

        #TODO(rishabh): implement get_label()
        """
    def test_eval_metrics(self):
        try:
            from sklearn.model_selection import train_test_split
        except ImportError:
            from sklearn.cross_validation import train_test_split
        from sklearn.datasets import load_digits

        digits = load_digits(2)
        X = digits['data']
        y = digits['target']

        Xt, Xv, yt, yv = train_test_split(X, y, test_size=0.2, random_state=0)

        dump_svmlight_file(Xt, yt, temp_name_t) 
        xgb.encrypt_file(temp_name_t, temp_enc_name_t, sym_key_file)
        dump_svmlight_file(Xv, yv, temp_name_v) 
        xgb.encrypt_file(temp_name_v, temp_enc_name_v, sym_key_file)
 
        dtrain = xgb.DMatrix({username: temp_enc_name_t})
        dvalid = xgb.DMatrix({username: temp_enc_name_v})

        watchlist = [(dtrain, 'train'), (dvalid, 'val')]

        gbdt_01 = xgb.train(self.xgb_params_01, dtrain, num_boost_round=10)
        gbdt_02 = xgb.train(self.xgb_params_02, dtrain, num_boost_round=10)
        gbdt_03 = xgb.train(self.xgb_params_03, dtrain, num_boost_round=10)
        assert all(gbdt_01.predict(dvalid)[0] == gbdt_02.predict(dvalid)[0])
        assert all(gbdt_01.predict(dvalid)[0] == gbdt_03.predict(dvalid)[0])

        #TODO(rishabh): implement early_stopping_rounds
        """
        gbdt_01 = xgb.train(self.xgb_params_01, dtrain, 10, watchlist,
                            early_stopping_rounds=2)
        gbdt_02 = xgb.train(self.xgb_params_02, dtrain, 10, watchlist,
                            early_stopping_rounds=2)
        gbdt_03 = xgb.train(self.xgb_params_03, dtrain, 10, watchlist,
                            early_stopping_rounds=2)
        gbdt_04 = xgb.train(self.xgb_params_04, dtrain, 10, watchlist,
                            early_stopping_rounds=2)
        assert gbdt_01.predict(dvalid)[0] == gbdt_02.predict(dvalid)[0]
        assert gbdt_01.predict(dvalid)[0] == gbdt_03.predict(dvalid)[0]
        assert gbdt_03.predict(dvalid)[0] != gbdt_04.predict(dvalid)[0]
        """

        #TODO(rishabh): implement early_stopping_rounds and feval
        """
 def test_monotone_constraints_for_exact_tree_method(self):
     # first check monotonicity for the 'exact' tree method
     params_for_constrained_exact_method = {
         'tree_method': 'exact',
         'verbosity': 1,
         'monotone_constraints': '(1, -1)'
     }
     constrained_exact_method = xgb.train(
         params_for_constrained_exact_method, training_dset)
     assert is_correctly_constrained(constrained_exact_method)
    def test_feature_names_validation(self):
        X = np.random.random((10, 3))
        y = np.random.randint(2, size=(10, ))

        dump_svmlight_file(X, y, temp_name)
        xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file)

        dm1 = xgb.DMatrix({username: temp_enc_name})
        dm2 = xgb.DMatrix({username: temp_enc_name},
                          feature_names=("a", "b", "c"))

        bst = xgb.train([], dm1)
        bst.predict(dm1)  # success
        self.assertRaises(ValueError, bst.predict, dm2)
        bst.predict(dm1)  # success

        bst = xgb.train([], dm2)
        bst.predict(dm2)  # success
        self.assertRaises(ValueError, bst.predict, dm1)
        bst.predict(dm2)  # success
    def test_monotone_constraints_for_depthwise_hist_tree_method(self):
        # next check monotonicity for the 'hist' tree method
        params_for_constrained_hist_method = {
            'tree_method': 'hist',
            'verbosity': 1,
            'monotone_constraints': '(1, -1)'
        }
        constrained_hist_method = xgb.train(params_for_constrained_hist_method,
                                            training_dset)

        assert is_correctly_constrained(constrained_hist_method)
示例#11
0
def run(channel_addr, sym_key_file, priv_key_file, cert_file):
    xgb.init_client(user_name=username, client_list=["user1", username], sym_key_file=sym_key_file, priv_key_file=priv_key_file, cert_file=cert_file, remote_addr=channel_addr)

    xgb.rabit.init()

    # Remote attestation
    print("Remote attestation")

    # Note: Simulation mode does not support attestation
    # pass in `verify=False` to attest()
    xgb.attest()
    print("Report successfully verified")

    print("Load training matrices")
    dtrain = xgb.DMatrix({"user1": HOME_DIR + "demo/python/multiclient-cluster-remote-control/data/c1_train.enc", username: HOME_DIR + "demo/python/multiclient-cluster-remote-control/data/c2_train.enc"}, encrypted=True)

    print("Creating test matrix")
    dtest1 = xgb.DMatrix({"user1": HOME_DIR + "demo/python/multiclient-cluster-remote-control/data/c1_test.enc"})
    dtest2 = xgb.DMatrix({username: HOME_DIR + "demo/python/multiclient-cluster-remote-control/data/c2_test.enc"})

    print("Beginning Training")

    # Set training parameters
    params = {
            "tree_method": "hist",
            "n_gpus": "0",
            "objective": "binary:logistic",
            "min_child_weight": "1",
            "gamma": "0.1",
            "max_depth": "3",
            "verbosity": "0" 
    }

    # Train and evaluate
    num_rounds = 10 
    print("Training...")
    booster = xgb.train(params, dtrain, num_rounds)

    # Enable the other party to get its predictions
    _, _ = booster.predict(dtest1, decrypt=False)

    # Get our predictions
    predictions, num_preds = booster.predict(dtest2, decrypt=False)

    # Decrypt predictions
    print("Predictions: ", booster.decrypt_predictions(predictions, num_preds)[:10])

    # Get fscores of model
    print("\nModel Feature Importance: ")
    print(booster.get_fscore())

    xgb.rabit.finalize()
示例#12
0
    def test_multiclass(self):
        dtrain = xgb.DMatrix({username: dpath + 'agaricus.txt.train.enc'})
        dtest = xgb.DMatrix({username: dpath + 'agaricus.txt.test.enc'})
        param = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'num_class': 2}
        # specify validations set to watch performance
        watchlist = [(dtest, 'eval'), (dtrain, 'train')]
        num_round = 2
        bst = xgb.train(param, dtrain, num_round, watchlist)
        # this is prediction
        preds = bst.predict(dtest)[0]

        #TODO(rishabh): support for get_label(), save_binary()
        """
示例#13
0
        def fn(max_depth, num_rounds):
            # train
            params = {'max_depth': max_depth, 'eta': 1, 'verbosity': 0}
            bst = xgb.train(params, dtrain, num_boost_round=num_rounds)

            # predict
            preds = bst.predict(dtest)[0]
            contribs = bst.predict(dtest, pred_contribs=True)[0]

            # result should be (number of features + BIAS) * number of rows
            assert contribs.shape == (dtest.num_row(), dtest.num_col() + 1)

            # sum of contributions should be same as predictions
            np.testing.assert_array_almost_equal(np.sum(contribs, axis=1), preds)
    def run_interaction_constraints(self, tree_method):
        x1 = np.random.normal(loc=1.0, scale=1.0, size=1000)
        x2 = np.random.normal(loc=1.0, scale=1.0, size=1000)
        x3 = np.random.choice([1, 2, 3], size=1000, replace=True)
        y = x1 + x2 + x3 + x1 * x2 * x3 \
            + np.random.normal(
                loc=0.001, scale=1.0, size=1000) + 3 * np.sin(x1)
        X = np.column_stack((x1, x2, x3))

        dump_svmlight_file(X, y, temp_name)
        xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file)

        dtrain = xgb.DMatrix({username: temp_enc_name})

        params = {
            'max_depth': 3,
            'eta': 0.1,
            'nthread': 2,
            'interaction_constraints': '[[0, 1]]',
            'tree_method': tree_method
        }
        num_boost_round = 12
        # Fit a model that only allows interaction between x1 and x2
        bst = xgb.train(params,
                        dtrain,
                        num_boost_round,
                        evals=[(dtrain, 'train')])

        # Set all observations to have the same x3 values then increment
        #   by the same amount
        def f(x):
            tX = np.column_stack((x1, x2, np.repeat(x, 1000)))

            dump_svmlight_file(tX, y, temp_name)
            xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file)

            tmat = xgb.DMatrix({username: temp_enc_name})

            return bst.predict(tmat)[0]

        preds = [f(x) for x in [1, 2, 3]]

        # Check incrementing x3 has the same effect on all observations
        #   since x3 is constrained to be independent of x1 and x2
        #   and all observations start off from the same x3 value
        diff1 = preds[1] - preds[0]
        assert np.all(np.abs(diff1 - diff1[0]) < 1e-4)
        diff2 = preds[2] - preds[1]
        assert np.all(np.abs(diff2 - diff2[0]) < 1e-4)
示例#15
0
    def run_model_pickling(self, xgb_params):
        X, y = generate_data()

        dump_svmlight_file(X, y, temp_name)
        xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file)

        dtrain = xgb.DMatrix({username: temp_enc_name})
        bst = xgb.train(xgb_params, dtrain)

        dump_0 = bst.get_dump(dump_format='json')
        assert dump_0

        filename = 'model.pkl'

        #TODO: support pickling
        """
 def test_glm(self):
     param = {
         'verbosity': 0,
         'objective': 'binary:logistic',
         'booster': 'gblinear',
         'alpha': 0.0001,
         'lambda': 1,
         'nthread': 1
     }
     watchlist = [(dtest, 'eval'), (dtrain, 'train')]
     num_round = 4
     bst = xgb.train(param, dtrain, num_round, watchlist)
     assert isinstance(bst, xgb.core.Booster)
     preds = bst.predict(dtest)[0]
     #TODO(rishabh): implement get_label()
     """
示例#17
0
    def test_pruner(self):
        import sklearn
        params = {'tree_method': 'exact'}
        cancer = sklearn.datasets.load_breast_cancer()
        X = cancer['data']
        y = cancer["target"]

        dump_svmlight_file(X, y, temp_name) 
        xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file)
 
        dtrain = xgb.DMatrix({username: temp_enc_name})
        booster = xgb.train(params, dtrain=dtrain, num_boost_round=10)
        grown = str(booster.get_dump())

        params = {'updater': 'prune', 'process_type': 'update', 'gamma': '0.2'}
        #TODO(rishabh): add support for xgb_model
        """
    def test_alpha_and_lambda(self):
        params = {
            'tree_method': 'exact',
            'verbosity': 1,
            'objective': 'reg:squarederror',
            'eta': 1,
            'lambda': 1,
            'alpha': 0.1
        }

        model = xgb.train(params, train_data, 1)
        preds = model.predict(train_data)

        # Default prediction (with no trees) is 0.5
        # sum_grad = (0.5 - 1.0)
        # sum_hess = 1.0
        # 0.7 = 0.5 - (sum_grad - alpha * sgn(sum_grad)) / (sum_hess + lambda)
        assert_approx_equal(preds[0], 0.7)
示例#19
0
    def test_lambda(self):
        #  train_data = xgb.DMatrix({username: temp_enc_name})
        params = {
            'tree_method': 'exact', 'verbosity': 0,
            'objective': 'reg:squarederror',
            'eta': 1,
            'lambda': 1,
            'alpha': 0
        }

        model = xgb.train(params, train_data, 1)
        preds = model.predict(train_data)[0]
        print(preds)

        # Default prediction (with no trees) is 0.5
        # sum_grad = (0.5 - 1.0)
        # sum_hess = 1.0
        # 0.75 = 0.5 - sum_grad / (sum_hess + lambda)
        assert_approx_equal(preds[0], 0.75)
示例#20
0
    def test_dump(self):
        data = np.random.randn(100, 2)
        target = np.array([0, 1] * 50)

        dump_svmlight_file(data, target, temp_name)
        xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file)

        features = ['Feature1', 'Feature2']

        dm = xgb.DMatrix({username: temp_enc_name}, feature_names=features)
        params = {
            'objective': 'binary:logistic',
            'eval_metric': 'logloss',
            'eta': 0.3,
            'max_depth': 1
        }

        bst = xgb.train(params, dm, num_boost_round=1)

        # number of feature importances should == number of features
        dump1 = bst.get_dump()
        self.assertEqual(len(dump1), 1, "Expected only 1 tree to be dumped.")
        self.assertEqual(len(dump1[0].splitlines()), 3,
                         "Expected 1 root and 2 leaves - 3 lines in dump.")

        dump2 = bst.get_dump(with_stats=True)
        self.assertEqual(dump2[0].count('\n'), 3,
                         "Expected 1 root and 2 leaves - 3 lines in dump.")
        self.assertGreater(
            dump2[0].find('\n'), dump1[0].find('\n'),
            "Expected more info when with_stats=True is given.")

        dump3 = bst.get_dump(dump_format="json")
        dump3j = json.loads(dump3[0])
        self.assertEqual(dump3j["nodeid"], 0, "Expected the root node on top.")

        dump4 = bst.get_dump(dump_format="json", with_stats=True)
        dump4j = json.loads(dump4[0])
        self.assertIn("gain", dump4j, "Expected 'gain' to be dumped in JSON.")
示例#21
0
    def test_feature_names(self):
        data = np.random.randn(100, 5)
        target = np.array([0, 1] * 50)

        dump_svmlight_file(data, target, temp_name)
        xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file)

        features = ['Feature1', 'Feature2', 'Feature3', 'Feature4', 'Feature5']

        dm = xgb.DMatrix({username: temp_enc_name}, feature_names=features)
        assert dm.feature_names == features
        assert dm.num_row() == 100
        assert dm.num_col() == 5

        params = {
            'objective': 'multi:softprob',
            'eval_metric': 'mlogloss',
            'eta': 0.3,
            'num_class': 3
        }

        bst = xgb.train(params, dm, num_boost_round=10)
        scores = bst.get_fscore()
        assert list(sorted(k for k in scores)) == features

        dummy_X = np.random.randn(5, 5)
        dummy_Y = np.random.randn(5)

        dump_svmlight_file(dummy_X, dummy_Y, temp_name)
        xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file)

        dm = xgb.DMatrix({username: temp_enc_name}, feature_names=features)
        bst.predict(dm)[0]

        # different feature name must raises error
        dm = xgb.DMatrix({username: temp_enc_name},
                         feature_names=list('abcde'))
        self.assertRaises(ValueError, bst.predict, dm)
示例#22
0
print("Beginning Training")

# Set training parameters
params = {
    "tree_method": "hist",
    "n_gpus": "0",
    "objective": "binary:logistic",
    "min_child_weight": "1",
    "gamma": "0.1",
    "max_depth": "3",
    "verbosity": "1"
}

# Train and evaluate
num_rounds = 5
booster = xgb.train(params,
                    dtrain,
                    num_rounds,
                    evals=[(dtrain, "train"), (dtest, "test")])
booster.save_model(DIR + "/demo_model.model")

# Get encrypted predictions
print("\n\nModel Predictions: ")
predictions, num_preds = booster.predict(dtest, decrypt=False)

# Decrypt predictions
print(booster.decrypt_predictions(predictions, num_preds)[:20])

xgb.rabit.finalize()
示例#23
0
    def test_feature_importances(self):
        data = np.random.randn(100, 5)
        target = np.array([0, 1] * 50)

        dump_svmlight_file(data, target, temp_name) 
        xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file)
 
        features = ['Feature1', 'Feature2', 'Feature3', 'Feature4', 'Feature5']

        dm = xgb.DMatrix({username: temp_enc_name}, feature_names=features)
        params = {'objective': 'multi:softprob',
                  'eval_metric': 'mlogloss',
                  'eta': 0.3,
                  'num_class': 3}

        bst = xgb.train(params, dm, num_boost_round=10)

        # number of feature importances should == number of features
        scores1 = bst.get_score()
        scores2 = bst.get_score(importance_type='weight')
        scores3 = bst.get_score(importance_type='cover')
        scores4 = bst.get_score(importance_type='gain')
        scores5 = bst.get_score(importance_type='total_cover')
        scores6 = bst.get_score(importance_type='total_gain')
        assert len(scores1) == len(features)
        assert len(scores2) == len(features)
        assert len(scores3) == len(features)
        assert len(scores4) == len(features)
        assert len(scores5) == len(features)
        assert len(scores6) == len(features)

        # check backwards compatibility of get_fscore
        fscores = bst.get_fscore()
        assert scores1 == fscores

        dtrain = xgb.DMatrix({username: dpath + 'agaricus.txt.train.enc'})
        dtest = xgb.DMatrix({username: dpath + 'agaricus.txt.test.enc'})

        def fn(max_depth, num_rounds):
            # train
            params = {'max_depth': max_depth, 'eta': 1, 'verbosity': 0}
            bst = xgb.train(params, dtrain, num_boost_round=num_rounds)

            # predict
            preds = bst.predict(dtest)[0]
            contribs = bst.predict(dtest, pred_contribs=True)[0]

            # result should be (number of features + BIAS) * number of rows
            assert contribs.shape == (dtest.num_row(), dtest.num_col() + 1)

            # sum of contributions should be same as predictions
            np.testing.assert_array_almost_equal(np.sum(contribs, axis=1), preds)

        # for max_depth, num_rounds in itertools.product(range(0, 3), range(1, 5)):
        #     yield fn, max_depth, num_rounds

        # check that we get the right SHAP values for a basic AND example
        # (https://arxiv.org/abs/1706.06060)
        X = np.zeros((4, 2))
        X[0, :] = 1
        X[1, 0] = 1
        X[2, 1] = 1
        y = np.zeros(4)
        y[0] = 1
        param = {"max_depth": 2, "base_score": 0.0, "eta": 1.0, "lambda": 0}

        dump_svmlight_file(X, y, temp_name) 
        xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file)

        bst = xgb.train(param, xgb.DMatrix({username: temp_enc_name}), 1)

        dump_svmlight_file(X[0:1, :], np.zeros(1), temp_name) 
        xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file)

        out = bst.predict(xgb.DMatrix({username: temp_enc_name}), pred_contribs=True)[0]
        #TODO(rishabh): enable pred_contribs
        """
        assert out[0, 0] == 0.375
        assert out[0, 1] == 0.375
        assert out[0, 2] == 0.25
        """

        def parse_model(model):
            trees = []
            r_exp = r"([0-9]+):\[f([0-9]+)<([0-9\.e-]+)\] yes=([0-9]+),no=([0-9]+).*cover=([0-9e\.]+)"
            r_exp_leaf = r"([0-9]+):leaf=([0-9\.e-]+),cover=([0-9e\.]+)"
            for tree in model.get_dump(with_stats=True):
                lines = list(tree.splitlines())
                trees.append([None for i in range(len(lines))])
                for line in lines:
                    match = re.search(r_exp, line)
                    if match is not None:
                        ind = int(match.group(1))
                        while ind >= len(trees[-1]):
                            trees[-1].append(None)
                        trees[-1][ind] = {
                            "yes_ind": int(match.group(4)),
                            "no_ind": int(match.group(5)),
                            "value": None,
                            "threshold": float(match.group(3)),
                            "feature_index": int(match.group(2)),
                            "cover": float(match.group(6))
                        }
                    else:

                        match = re.search(r_exp_leaf, line)
                        ind = int(match.group(1))
                        while ind >= len(trees[-1]):
                            trees[-1].append(None)
                        trees[-1][ind] = {
                            "value": float(match.group(2)),
                            "cover": float(match.group(3))
                        }
            return trees

        def exp_value_rec(tree, z, x, i=0):
            if tree[i]["value"] is not None:
                return tree[i]["value"]
            else:
                ind = tree[i]["feature_index"]
                if z[ind] == 1:
                    if x[ind] < tree[i]["threshold"]:
                        return exp_value_rec(tree, z, x, tree[i]["yes_ind"])
                    else:
                        return exp_value_rec(tree, z, x, tree[i]["no_ind"])
                else:
                    r_yes = tree[tree[i]["yes_ind"]]["cover"] / tree[i]["cover"]
                    out = exp_value_rec(tree, z, x, tree[i]["yes_ind"])
                    val = out * r_yes

                    r_no = tree[tree[i]["no_ind"]]["cover"] / tree[i]["cover"]
                    out = exp_value_rec(tree, z, x, tree[i]["no_ind"])
                    val += out * r_no
                    return val

        def exp_value(trees, z, x):
            return np.sum([exp_value_rec(tree, z, x) for tree in trees])

        def all_subsets(ss):
            return itertools.chain(*map(lambda x: itertools.combinations(ss, x), range(0, len(ss) + 1)))

        def shap_value(trees, x, i, cond=None, cond_value=None):
            M = len(x)
            z = np.zeros(M)
            other_inds = list(set(range(M)) - set([i]))
            if cond is not None:
                other_inds = list(set(other_inds) - set([cond]))
                z[cond] = cond_value
                M -= 1
            total = 0.0

            for subset in all_subsets(other_inds):
                if len(subset) > 0:
                    z[list(subset)] = 1
                v1 = exp_value(trees, z, x)
                z[i] = 1
                v2 = exp_value(trees, z, x)
                total += (v2 - v1) / (scipy.special.binom(M - 1, len(subset)) * M)
                z[i] = 0
                z[list(subset)] = 0
            return total

        def shap_values(trees, x):
            vals = [shap_value(trees, x, i) for i in range(len(x))]
            vals.append(exp_value(trees, np.zeros(len(x)), x))
            return np.array(vals)

        def interaction_values(trees, x):
            M = len(x)
            out = np.zeros((M + 1, M + 1))
            for i in range(len(x)):
                for j in range(len(x)):
                    if i != j:
                        out[i, j] = interaction_value(trees, x, i, j) / 2
            svals = shap_values(trees, x)
            main_effects = svals - out.sum(1)
            out[np.diag_indices_from(out)] = main_effects
            return out

        def interaction_value(trees, x, i, j):
            M = len(x)
            z = np.zeros(M)
            other_inds = list(set(range(M)) - set([i, j]))

            total = 0.0
            for subset in all_subsets(other_inds):
                if len(subset) > 0:
                    z[list(subset)] = 1
                v00 = exp_value(trees, z, x)
                z[i] = 1
                v10 = exp_value(trees, z, x)
                z[j] = 1
                v11 = exp_value(trees, z, x)
                z[i] = 0
                v01 = exp_value(trees, z, x)
                z[j] = 0
                total += (v11 - v01 - v10 + v00) / (scipy.special.binom(M - 2, len(subset)) * (M - 1))
                z[list(subset)] = 0
            return total

        # test a simple and function
        M = 2
        N = 4
        X = np.zeros((N, M))
        X[0, :] = 1
        X[1, 0] = 1
        X[2, 1] = 1
        y = np.zeros(N)
        y[0] = 1
        param = {"max_depth": 2, "base_score": 0.0, "eta": 1.0, "lambda": 0}

        #TODO(rishabh): enable pred_contribs
        """
示例#24
0
def run(channel_addr, sym_key_file, priv_key_file, cert_file):
    # Remote attestation
    print("Remote attestation")
    xgb.init_client(user_name=username,
                    sym_key_file=sym_key_file,
                    priv_key_file=priv_key_file,
                    cert_file=cert_file,
                    remote_addr=channel_addr)

    # Note: Simulation mode does not support attestation
    # pass in `verify=False` to attest()
    xgb.attest()
    print("Report successfully verified")

    print("Creating training matrix")
    dtrain = xgb.DMatrix(
        {username: HOME_DIR + "demo/python/remote-control/data/train.enc"})
    if not dtrain:
        print("Error creating dtrain")
        return
    print("dtrain: " + dtrain.handle.value.decode("utf-8"))

    print("Creating test matrix")
    dtest = xgb.DMatrix(
        {username: HOME_DIR + "demo/python/remote-control/data/test.enc"})
    if not dtest:
        print("Error creating dtest")
        return
    print("dtest: " + dtest.handle.value.decode("utf-8"))

    print("Beginning Training")

    # Set training parameters
    params = {
        "tree_method": "hist",
        "n_gpus": "0",
        "objective": "binary:logistic",
        "min_child_weight": "1",
        "gamma": "0.1",
        "max_depth": "3",
        "verbosity": "0"
    }

    # Train and evaluate
    num_rounds = 5
    print("Training...")
    booster = xgb.train(params, dtrain, num_rounds)

    print("booster: " + booster.handle.value.decode("utf-8"))

    booster.save_model(HOME_DIR +
                       "demo/python/remote-control/client/modelfile.model")

    # Get encrypted predictions
    print("\nModel Predictions: ")
    predictions, num_preds = booster.predict(dtest, decrypt=False)

    # Decrypt predictions
    print(booster.decrypt_predictions(predictions, num_preds))

    # Get fscores of model
    print("\nModel Feature Importance: ")
    print(booster.get_fscore())