Exemplo n.º 1
0
    def setUpClass(cls):
        """
        Download and setup the test fixtures
        """
        from sklearn.datasets import load_svmlight_files
        # download the test data
        cls.dpath = 'demo/rank/'
        src = 'https://s3-us-west-2.amazonaws.com/xgboost-examples/MQ2008.zip'
        target = cls.dpath + '/MQ2008.zip'
        urllib.request.urlretrieve(url=src, filename=target)

        with zipfile.ZipFile(target, 'r') as f:
            f.extractall(path=cls.dpath)

        (x_train, y_train, qid_train, x_test, y_test, qid_test, x_valid,
         y_valid, qid_valid) = load_svmlight_files(
             (cls.dpath + "MQ2008/Fold1/train.txt", cls.dpath +
              "MQ2008/Fold1/test.txt", cls.dpath + "MQ2008/Fold1/vali.txt"),
             query_id=True,
             zero_based=False)
        # instantiate the matrices
        dump_svmlight_file(x_train, y_train, temp_name)
        xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file)
        cls.dtrain = xgb.DMatrix({username: temp_enc_name})

        dump_svmlight_file(x_valid, y_valid, temp_name)
        xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file)
        cls.dvalid = xgb.DMatrix({username: temp_enc_name})

        dump_svmlight_file(x_test, y_test, temp_name)
        xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file)
        cls.dtest = xgb.DMatrix({username: temp_enc_name})
        #TODO(rishabh): add support for set_group()
        """
def is_correctly_constrained(learner):
    n = 100
    variable_x = np.linspace(0, 1, n).reshape((n, 1))
    fixed_xs_values = np.linspace(0, 1, n)

    for i in range(1, n - 1):
        fixed_x = fixed_xs_values[i] * np.ones((n, 1))
        y_dummy = np.random.randn(n)
        monotonically_increasing_x = np.column_stack((variable_x, fixed_x))
        dump_svmlight_file(monotonically_increasing_x, y_dummy, temp_name)
        xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file)
        monotonically_increasing_dset = xgb.DMatrix({username: temp_enc_name},
                                                    feature_names=['f0', 'f1'])
        monotonically_increasing_y = learner.predict(
            monotonically_increasing_dset)[0]

        monotonically_decreasing_x = np.column_stack((fixed_x, variable_x))
        dump_svmlight_file(monotonically_decreasing_x, y_dummy, temp_name)
        xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file)
        monotonically_decreasing_dset = xgb.DMatrix({username: temp_enc_name})
        monotonically_decreasing_y = learner.predict(
            monotonically_decreasing_dset)[0]

        if not (is_increasing(monotonically_increasing_y)
                and is_decreasing(monotonically_decreasing_y)):
            return False

    return True
Exemplo n.º 3
0
    def test_basic(self):
        dtrain = xgb.DMatrix({username: dpath + 'agaricus.txt.train.enc'})
        dtest = xgb.DMatrix({username: dpath + 'agaricus.txt.test.enc'})
        param = {'max_depth': 2, 'eta': 1, 'objective': 'binary:logistic'}
        # specify validations set to watch performance
        watchlist = [(dtrain, 'train')]
        num_round = 2
        bst = xgb.train(param, dtrain, num_round, watchlist)

        preds = bst.predict(dtrain)[0]
        # TODO(rishabh): support for get_label()
        """
        labels = dtrain.get_label()
        err = sum(1 for i in range(len(preds))
                  if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
        # error must be smaller than 10%
        assert err < 0.1

        preds = bst.predict(dtest)[0]
        labels = dtest.get_label()
        err = sum(1 for i in range(len(preds))
                  if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
        # error must be smaller than 10%
        assert err < 0.1
        """

        # TODO(rishabh): support for save_binary()
        """
Exemplo n.º 4
0
    def test_basic_rpc(self):
        channel_addr = "127.0.0.1:50052"
        xgb.init_client(user_name=username, sym_key_file=sym_key_file, priv_key_file=priv_key_file, cert_file=cert_file, remote_addr=channel_addr)
        xgb.attest(verify=False)

        dtrain = xgb.DMatrix({username: dpath + 'agaricus.txt.train.enc'})
        dtest = xgb.DMatrix({username: dpath + 'agaricus.txt.test.enc'})

        # Set training parameters
        params = {
                "tree_method": "hist",
                "n_gpus": "0",
                "objective": "binary:logistic",
                "min_child_weight": "1",
                "gamma": "0.1",
                "max_depth": "5",
                "verbosity": "0" 
        }

        num_rounds = 2
        booster = xgb.train(params, dtrain, num_rounds)

        predictions, num_preds = booster.predict(dtest, decrypt=False)

        preds = booster.decrypt_predictions(predictions, num_preds)
        ten_preds = preds[:10]
        
        labels = [0, 1, 0, 0, 0, 0, 1, 0, 1, 0]
        err = sum(1 for i in range(len(ten_preds))
                  if int(ten_preds[i] > 0.5) != labels[i]) / float(len(ten_preds))

        # error must be smaller than 10%
        assert err < 0.1
Exemplo n.º 5
0
    def test_fast_histmaker(self):
        variable_param = {'tree_method': ['hist'],
                          'max_depth': [2, 8],
                          'max_bin': [2, 256],
                          'grow_policy': ['depthwise', 'lossguide'],
                          'max_leaves': [64, 0],
                          'verbosity': [0]}
        for param in parameter_combinations(variable_param):
            result = run_suite(param)
            assert_results_non_increasing(result, 1e-2)

        # hist must be same as exact on all-categorial data
        dpath = HOME_DIR + 'demo/data/'
        ag_dtrain = xgb.DMatrix({username: dpath + 'agaricus.txt.train.enc'})
        ag_dtest = xgb.DMatrix({username: dpath + 'agaricus.txt.test.enc'})
        ag_param = {'max_depth': 2,
                    'tree_method': 'hist',
                    'eta': 1,
                    'verbosity': 0,
                    'objective': 'binary:logistic',
                    'eval_metric': 'auc'}
        hist_res = {}
        exact_res = {}

        #TODO(rishabh): support for evals_result
        """
Exemplo n.º 6
0
    def run_training_continuation(self, xgb_params_01, xgb_params_02,
                                  xgb_params_03):
        from sklearn.datasets import load_digits
        from sklearn.metrics import mean_squared_error

        digits_2class = load_digits(2)
        digits_5class = load_digits(5)

        X_2class = digits_2class['data']
        y_2class = digits_2class['target']

        X_5class = digits_5class['data']
        y_5class = digits_5class['target']

        dump_svmlight_file(X_2class, y_2class, temp_name)
        xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file)

        dtrain_2class = xgb.DMatrix({username: temp_enc_name})

        dump_svmlight_file(X_5class, y_5class, temp_name)
        xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file)

        dtrain_5class = xgb.DMatrix({username: temp_enc_name})

        gbdt_01 = xgb.train(xgb_params_01, dtrain_2class, num_boost_round=10)
        ntrees_01 = len(gbdt_01.get_dump())
        assert ntrees_01 == 10

        gbdt_02 = xgb.train(xgb_params_01, dtrain_2class, num_boost_round=0)
        gbdt_02.save_model(HOME_DIR + 'xgb_tc.model')

        #TODO(rishabh): add support for xgb_model
        """
Exemplo n.º 7
0
    def test_dart(self):
        dtrain = xgb.DMatrix({username: dpath + 'agaricus.txt.train.enc'})
        dtest = xgb.DMatrix({username: dpath + 'agaricus.txt.test.enc'})
        param = {
            'max_depth': 5,
            'objective': 'binary:logistic',
            'eval_metric': 'logloss',
            'booster': 'dart',
            'verbosity': 1
        }
        # specify validations set to watch performance
        watchlist = [(dtest, 'eval'), (dtrain, 'train')]
        num_round = 2
        bst = xgb.train(param, dtrain, num_round, watchlist)
        # this is prediction
        preds = bst.predict(dtest, ntree_limit=num_round)[0]
        #TODO(rishabh): implement get_label()
        """
        labels = dtest.get_label()
        err = sum(1 for i in range(len(preds))
                  if int(preds[i] > 0.5) != labels[i]) / float(len(preds))
        # error must be smaller than 10%
        assert err < 0.1
        """

        #TODO(rishabh): implement save_binary()
        """
        # save dmatrix into binary buffer
        dtest.save_binary('dtest.buffer')
        model_path = 'xgb.model.dart'
        # save model
        bst.save_model(model_path)
        # load model and data in
        bst2 = xgb.Booster(params=param, model_file='xgb.model.dart')
        dtest2 = xgb.DMatrix('dtest.buffer')
        preds2 = bst2.predict(dtest2, ntree_limit=num_round)[0]
        # assert they are the same
        assert np.sum(np.abs(preds2 - preds)) == 0
        """
        def my_logloss(preds, dtrain):
            return
            #TODO(rishabh): implement get_label()
            """
            labels = dtrain.get_label()
            return 'logloss', np.sum(
                np.log(np.where(labels, preds, 1 - preds)))
            """

        # check whether custom evaluation metrics work
        #TODO: implement feval (allow definition of a loss function?)
        """
        bst = xgb.train(param, dtrain, num_round, watchlist,
                        feval=my_logloss)
        preds3 = bst.predict(dtest, ntree_limit=num_round)[0]
        assert all(preds3 == preds)
        """

        #TODO(rishabh): implement get_label()
        """
Exemplo n.º 8
0
def run(channel_addr, sym_key_file, priv_key_file, cert_file):
    xgb.init_client(user_name=username, client_list=["user1", username], sym_key_file=sym_key_file, priv_key_file=priv_key_file, cert_file=cert_file, remote_addr=channel_addr)

    xgb.rabit.init()

    # Remote attestation
    print("Remote attestation")

    # Note: Simulation mode does not support attestation
    # pass in `verify=False` to attest()
    xgb.attest()
    print("Report successfully verified")

    print("Load training matrices")
    dtrain = xgb.DMatrix({"user1": HOME_DIR + "demo/python/multiclient-cluster-remote-control/data/c1_train.enc", username: HOME_DIR + "demo/python/multiclient-cluster-remote-control/data/c2_train.enc"}, encrypted=True)

    print("Creating test matrix")
    dtest1 = xgb.DMatrix({"user1": HOME_DIR + "demo/python/multiclient-cluster-remote-control/data/c1_test.enc"})
    dtest2 = xgb.DMatrix({username: HOME_DIR + "demo/python/multiclient-cluster-remote-control/data/c2_test.enc"})

    print("Beginning Training")

    # Set training parameters
    params = {
            "tree_method": "hist",
            "n_gpus": "0",
            "objective": "binary:logistic",
            "min_child_weight": "1",
            "gamma": "0.1",
            "max_depth": "3",
            "verbosity": "0" 
    }

    # Train and evaluate
    num_rounds = 10 
    print("Training...")
    booster = xgb.train(params, dtrain, num_rounds)

    # Enable the other party to get its predictions
    _, _ = booster.predict(dtest1, decrypt=False)

    # Get our predictions
    predictions, num_preds = booster.predict(dtest2, decrypt=False)

    # Decrypt predictions
    print("Predictions: ", booster.decrypt_predictions(predictions, num_preds)[:10])

    # Get fscores of model
    print("\nModel Feature Importance: ")
    print(booster.get_fscore())

    xgb.rabit.finalize()
Exemplo n.º 9
0
    def test_multiclass(self):
        dtrain = xgb.DMatrix({username: dpath + 'agaricus.txt.train.enc'})
        dtest = xgb.DMatrix({username: dpath + 'agaricus.txt.test.enc'})
        param = {'max_depth': 2, 'eta': 1, 'verbosity': 0, 'num_class': 2}
        # specify validations set to watch performance
        watchlist = [(dtest, 'eval'), (dtrain, 'train')]
        num_round = 2
        bst = xgb.train(param, dtrain, num_round, watchlist)
        # this is prediction
        preds = bst.predict(dtest)[0]

        #TODO(rishabh): support for get_label(), save_binary()
        """
Exemplo n.º 10
0
 def test_record_results(self):
     dtrain = xgb.DMatrix({username: dpath + 'agaricus.txt.train.enc'})
     dtest = xgb.DMatrix({username: dpath + 'agaricus.txt.test.enc'})
     param = {
         'max_depth': 2,
         'eta': 1,
         'verbosity': 0,
         'objective': 'binary:logistic'
     }
     # specify validations set to watch performance
     watchlist = [(dtest, 'eval'), (dtrain, 'train')]
     num_round = 2
     result = {}
     res2 = {}
     # TODO(rishabh) support for callbacks, evals_result
     """
Exemplo n.º 11
0
def assert_regression_result(results, tol):
    regression_results = [
        r for r in results if r["param"]["objective"] == "reg:squarederror"
    ]
    for res in regression_results:
        X = scale(res["dataset"].X,
                  with_mean=isinstance(res["dataset"].X, np.ndarray))
        y = res["dataset"].y

        dump_svmlight_file(X, y, temp_name)
        xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file)

        reg_alpha = res["param"]["alpha"]
        reg_lambda = res["param"]["lambda"]
        pred = res["bst"].predict(xgb.DMatrix({username: temp_enc_name}))
        weights = xgb_get_weights(res["bst"])[1:]
        enet = ElasticNet(alpha=reg_alpha + reg_lambda,
                          l1_ratio=reg_alpha / (reg_alpha + reg_lambda))
        enet.fit(X, y)
        enet_pred = enet.predict(X)
        assert np.isclose(weights, enet.coef_, rtol=tol,
                          atol=tol).all(), (weights, enet.coef_)
        assert np.isclose(enet_pred, pred, rtol=tol,
                          atol=tol).all(), (res["dataset"].name, enet_pred[:5],
                                            pred[:5])
Exemplo n.º 12
0
    def test_dmatrix_dimensions(self):
        data = np.random.randn(5, 5)
        target = np.random.randn(5)
        dump_svmlight_file(data, target, temp_name)
        xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file)
        dm = xgb.DMatrix({username: temp_enc_name})
        assert dm.num_row() == 5
        assert dm.num_col() == 5

        data = np.random.randn(2, 2)
        target = np.random.randn(2)
        dump_svmlight_file(data, target, temp_name)
        xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file)
        dm = xgb.DMatrix({username: temp_enc_name})
        assert dm.num_row() == 2
        assert dm.num_col() == 2
Exemplo n.º 13
0
    def test_feature_names_slice(self):
        data = np.random.randn(5, 5)
        target = np.random.randn(5)
        dump_svmlight_file(data, target, temp_name)
        xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file)

        # different length
        self.assertRaises(ValueError,
                          xgb.DMatrix, {username: temp_enc_name},
                          feature_names=list('abcdef'))
        # contains duplicates
        self.assertRaises(ValueError,
                          xgb.DMatrix, {username: temp_enc_name},
                          feature_names=['a', 'b', 'c', 'd', 'd'])
        # contains symbol
        self.assertRaises(ValueError,
                          xgb.DMatrix, {username: temp_enc_name},
                          feature_names=['a', 'b', 'c', 'd', 'e<1'])

        dm = xgb.DMatrix({username: temp_enc_name})
        dm.feature_names = list('abcde')
        assert dm.feature_names == list('abcde')

        #TODO(rishabh): implement slice()
        """
Exemplo n.º 14
0
 def build_model(self, max_depth, num_round):
     dtrain = xgb.DMatrix({username: dpath + 'agaricus.txt.train.enc'})
     param = {'max_depth': max_depth, 'objective': 'binary:logistic',
              'verbosity': 1}
     num_round = num_round
     bst = xgb.train(param, dtrain, num_round)
     return bst
Exemplo n.º 15
0
 def test_boost_from_prediction(self):
     # Re-construct dtrain here to avoid modification
     margined = xgb.DMatrix({username: dpath + 'agaricus.txt.train.enc'})
     bst = xgb.train({'tree_method': 'hist'}, margined, 1)
     predt_0 = bst.predict(margined, output_margin=True)
     #TODO(rishabh): implement set_base_margin()
     """
Exemplo n.º 16
0
    def test_eval_metrics(self):
        try:
            from sklearn.model_selection import train_test_split
        except ImportError:
            from sklearn.cross_validation import train_test_split
        from sklearn.datasets import load_digits

        digits = load_digits(2)
        X = digits['data']
        y = digits['target']

        Xt, Xv, yt, yv = train_test_split(X, y, test_size=0.2, random_state=0)

        dump_svmlight_file(Xt, yt, temp_name_t) 
        xgb.encrypt_file(temp_name_t, temp_enc_name_t, sym_key_file)
        dump_svmlight_file(Xv, yv, temp_name_v) 
        xgb.encrypt_file(temp_name_v, temp_enc_name_v, sym_key_file)
 
        dtrain = xgb.DMatrix({username: temp_enc_name_t})
        dvalid = xgb.DMatrix({username: temp_enc_name_v})

        watchlist = [(dtrain, 'train'), (dvalid, 'val')]

        gbdt_01 = xgb.train(self.xgb_params_01, dtrain, num_boost_round=10)
        gbdt_02 = xgb.train(self.xgb_params_02, dtrain, num_boost_round=10)
        gbdt_03 = xgb.train(self.xgb_params_03, dtrain, num_boost_round=10)
        assert all(gbdt_01.predict(dvalid)[0] == gbdt_02.predict(dvalid)[0])
        assert all(gbdt_01.predict(dvalid)[0] == gbdt_03.predict(dvalid)[0])

        #TODO(rishabh): implement early_stopping_rounds
        """
        gbdt_01 = xgb.train(self.xgb_params_01, dtrain, 10, watchlist,
                            early_stopping_rounds=2)
        gbdt_02 = xgb.train(self.xgb_params_02, dtrain, 10, watchlist,
                            early_stopping_rounds=2)
        gbdt_03 = xgb.train(self.xgb_params_03, dtrain, 10, watchlist,
                            early_stopping_rounds=2)
        gbdt_04 = xgb.train(self.xgb_params_04, dtrain, 10, watchlist,
                            early_stopping_rounds=2)
        assert gbdt_01.predict(dvalid)[0] == gbdt_02.predict(dvalid)[0]
        assert gbdt_01.predict(dvalid)[0] == gbdt_03.predict(dvalid)[0]
        assert gbdt_03.predict(dvalid)[0] != gbdt_04.predict(dvalid)[0]
        """

        #TODO(rishabh): implement early_stopping_rounds and feval
        """
Exemplo n.º 17
0
    def test_omp(self):
        dpath = HOME_DIR + 'demo/data/'
        dtrain = xgb.DMatrix({username: dpath + 'agaricus.txt.train.enc'})
        dtest = xgb.DMatrix({username: dpath + 'agaricus.txt.test.enc'})

        param = {'booster': 'gbtree',
                 'objective': 'binary:logistic',
                 'grow_policy': 'depthwise',
                 'tree_method': 'hist',
                 'eval_metric': 'error',
                 'max_depth': 5,
                 'min_child_weight': 0}

        watchlist = [(dtest, 'eval'), (dtrain, 'train')]
        num_round = 5

        #TODO(rishabh): implement evals_result in xgb.train()
        """
Exemplo n.º 18
0
        def f(x):
            tX = np.column_stack((x1, x2, np.repeat(x, 1000)))

            dump_svmlight_file(tX, y, temp_name) 
            xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file)

            tmat = xgb.DMatrix({username: temp_enc_name})

            return bst.predict(tmat)[0]
Exemplo n.º 19
0
    def test_feature_names_validation(self):
        X = np.random.random((10, 3))
        y = np.random.randint(2, size=(10, ))

        dump_svmlight_file(X, y, temp_name)
        xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file)

        dm1 = xgb.DMatrix({username: temp_enc_name})
        dm2 = xgb.DMatrix({username: temp_enc_name},
                          feature_names=("a", "b", "c"))

        bst = xgb.train([], dm1)
        bst.predict(dm1)  # success
        self.assertRaises(ValueError, bst.predict, dm2)
        bst.predict(dm1)  # success

        bst = xgb.train([], dm2)
        bst.predict(dm2)  # success
        self.assertRaises(ValueError, bst.predict, dm1)
        bst.predict(dm2)  # success
Exemplo n.º 20
0
    def test_cv_no_shuffle(self):
        dm = xgb.DMatrix({username: dpath + 'agaricus.txt.train.enc'})
        params = {
            'max_depth': 2,
            'eta': 1,
            'verbosity': 0,
            'objective': 'binary:logistic'
        }

        #TODO: implement cv()
        """
def xgb_load_train_predict():
    """
    This code will have been agreed upon by all parties before being run.
    """
    print("Creating training matrix")
    dtrain = xgb.DMatrix(HOME_DIR +
                         "demo/python/remote-control/client/train.enc",
                         encrypted=True)

    print("Creating test matrix")
    dtest = xgb.DMatrix(HOME_DIR +
                        "demo/python/remote-control/client/test.enc",
                        encrypted=True)

    print("Creating Booster")
    booster = xgb.Booster(cache=(dtrain, dtest))

    print("Beginning Training")

    # Set training parameters
    params = {
        "tree_method": "hist",
        "n_gpus": "0",
        "objective": "binary:logistic",
        "min_child_weight": "1",
        "gamma": "0.1",
        "max_depth": "3",
        "verbosity": "1"
    }
    booster.set_param(params)
    print("All parameters set")

    # Train and evaluate
    n_trees = 10
    for i in range(n_trees):
        booster.update(dtrain, i)
        print(booster.eval_set([(dtrain, "train"), (dtest, "test")], i))

    enc_preds, num_preds = booster.predict(dtest)
    return enc_preds, num_preds
Exemplo n.º 22
0
    def test_feature_names(self):
        data = np.random.randn(100, 5)
        target = np.array([0, 1] * 50)

        dump_svmlight_file(data, target, temp_name)
        xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file)

        features = ['Feature1', 'Feature2', 'Feature3', 'Feature4', 'Feature5']

        dm = xgb.DMatrix({username: temp_enc_name}, feature_names=features)
        assert dm.feature_names == features
        assert dm.num_row() == 100
        assert dm.num_col() == 5

        params = {
            'objective': 'multi:softprob',
            'eval_metric': 'mlogloss',
            'eta': 0.3,
            'num_class': 3
        }

        bst = xgb.train(params, dm, num_boost_round=10)
        scores = bst.get_fscore()
        assert list(sorted(k for k in scores)) == features

        dummy_X = np.random.randn(5, 5)
        dummy_Y = np.random.randn(5)

        dump_svmlight_file(dummy_X, dummy_Y, temp_name)
        xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file)

        dm = xgb.DMatrix({username: temp_enc_name}, feature_names=features)
        bst.predict(dm)[0]

        # different feature name must raises error
        dm = xgb.DMatrix({username: temp_enc_name},
                         feature_names=list('abcde'))
        self.assertRaises(ValueError, bst.predict, dm)
Exemplo n.º 23
0
    def test_cv_early_stopping(self):
        from sklearn.datasets import load_digits

        digits = load_digits(2)
        X = digits['data']
        y = digits['target']
        dump_svmlight_file(X, y, temp_name) 
        xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file)
        dm = xgb.DMatrix({username: temp_enc_name})
        params = {'max_depth': 2, 'eta': 1, 'verbosity': 0,
                  'objective': 'binary:logistic'}

        #TODO(rishabh): implement cv()
        """
    def run_interaction_constraints(self, tree_method):
        x1 = np.random.normal(loc=1.0, scale=1.0, size=1000)
        x2 = np.random.normal(loc=1.0, scale=1.0, size=1000)
        x3 = np.random.choice([1, 2, 3], size=1000, replace=True)
        y = x1 + x2 + x3 + x1 * x2 * x3 \
            + np.random.normal(
                loc=0.001, scale=1.0, size=1000) + 3 * np.sin(x1)
        X = np.column_stack((x1, x2, x3))

        dump_svmlight_file(X, y, temp_name)
        xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file)

        dtrain = xgb.DMatrix({username: temp_enc_name})

        params = {
            'max_depth': 3,
            'eta': 0.1,
            'nthread': 2,
            'interaction_constraints': '[[0, 1]]',
            'tree_method': tree_method
        }
        num_boost_round = 12
        # Fit a model that only allows interaction between x1 and x2
        bst = xgb.train(params,
                        dtrain,
                        num_boost_round,
                        evals=[(dtrain, 'train')])

        # Set all observations to have the same x3 values then increment
        #   by the same amount
        def f(x):
            tX = np.column_stack((x1, x2, np.repeat(x, 1000)))

            dump_svmlight_file(tX, y, temp_name)
            xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file)

            tmat = xgb.DMatrix({username: temp_enc_name})

            return bst.predict(tmat)[0]

        preds = [f(x) for x in [1, 2, 3]]

        # Check incrementing x3 has the same effect on all observations
        #   since x3 is constrained to be independent of x1 and x2
        #   and all observations start off from the same x3 value
        diff1 = preds[1] - preds[0]
        assert np.all(np.abs(diff1 - diff1[0]) < 1e-4)
        diff2 = preds[2] - preds[1]
        assert np.all(np.abs(diff2 - diff2[0]) < 1e-4)
Exemplo n.º 25
0
    def run_model_pickling(self, xgb_params):
        X, y = generate_data()

        dump_svmlight_file(X, y, temp_name)
        xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file)

        dtrain = xgb.DMatrix({username: temp_enc_name})
        bst = xgb.train(xgb_params, dtrain)

        dump_0 = bst.get_dump(dump_format='json')
        assert dump_0

        filename = 'model.pkl'

        #TODO: support pickling
        """
Exemplo n.º 26
0
    def test_cv_explicit_fold_indices(self):
        dm = xgb.DMatrix({username: dpath + 'agaricus.txt.train.enc'})
        params = {
            'max_depth': 2,
            'eta': 1,
            'verbosity': 0,
            'objective': 'binary:logistic'
        }
        folds = [
            # Train        Test
            ([1, 3], [5, 8]),
            ([7, 9], [23, 43]),
        ]

        #TODO: implement cv()
        """
Exemplo n.º 27
0
def test_ranking_with_unweighted_data():
    Xrow = np.array([1, 2, 6, 8, 11, 14, 16, 17])
    Xcol = np.array([0, 0, 1, 1, 2, 2, 3, 3])
    X = csr_matrix((np.ones(shape=8), (Xrow, Xcol)), shape=(20, 4)).toarray()
    y = np.array([
        0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0,
        1.0, 0.0, 1.0, 1.0, 0.0, 0.0
    ])

    dump_svmlight_file(X, y, temp_name)
    xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file)

    group = np.array([5, 5, 5, 5], dtype=np.uint)
    dtrain = xgb.DMatrix({username: temp_enc_name})
    #TODO(rishabh): implement set_group()
    """
Exemplo n.º 28
0
    def test_cv_early_stopping_with_multiple_eval_sets_and_metrics(self):
        from sklearn.datasets import load_breast_cancer

        X, y = load_breast_cancer(return_X_y=True)
        dump_svmlight_file(X, y, temp_name)
        xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file)
        dm = xgb.DMatrix({username: temp_enc_name})
        params = {'objective': 'binary:logistic'}

        metrics = [['auc'], ['error'], ['logloss'], ['logloss', 'auc'],
                   ['logloss', 'error'], ['error', 'logloss']]

        num_iteration_history = []

        # If more than one metrics is given, early stopping should use the last metric
        #TODO(rishabh): implement cv()
        """
Exemplo n.º 29
0
    def test_pruner(self):
        import sklearn
        params = {'tree_method': 'exact'}
        cancer = sklearn.datasets.load_breast_cancer()
        X = cancer['data']
        y = cancer["target"]

        dump_svmlight_file(X, y, temp_name) 
        xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file)
 
        dtrain = xgb.DMatrix({username: temp_enc_name})
        booster = xgb.train(params, dtrain=dtrain, num_boost_round=10)
        grown = str(booster.get_dump())

        params = {'updater': 'prune', 'process_type': 'update', 'gamma': '0.2'}
        #TODO(rishabh): add support for xgb_model
        """
Exemplo n.º 30
0
    def test_slice(self):
        X = rng.randn(100, 100)
        y = rng.randint(low=0, high=3, size=100)
        dump_svmlight_file(X, y, temp_name)
        xgb.encrypt_file(temp_name, temp_enc_name, sym_key_file)
        d = xgb.DMatrix({username: temp_enc_name})
        eval_res_0 = {}
        #TODO(rishabh): implement evals_result()
        """
        booster = xgb.train(
            {'num_class': 3, 'objective': 'multi:softprob'}, d,
            num_boost_round=2, evals=[(d, 'd')], evals_result=eval_res_0)

        predt = booster.predict(d)[0]
        predt = predt.reshape(100 * 3, 1)
        d.set_base_margin(predt)
        """

        #TODO(rishabh): implement slice()
        """