Exemplo n.º 1
0
 def test_grid_combo_numeric_numeric(self):
     grid_combos = _get_grid_combos(feature_grids=[[-1, -2], [1, 2, 3]],
                                    feature_types=["numeric", "numeric"])
     assert_array_equal(
         grid_combos,
         np.array([[-1, 1], [-1, 2], [-1, 3], [-2, 1], [-2, 2], [-2, 3]]),
     )
Exemplo n.º 2
0
 def test_grid_combo_onehot_numeric(self):
     grid_combos = _get_grid_combos(
         feature_grids=[["one", "two"], [1, 2, 3]],
         feature_types=["onehot", "numeric"],
     )
     assert_array_equal(
         grid_combos,
         np.array([[1, 0, 1], [1, 0, 2], [1, 0, 3], [0, 1, 1], [0, 1, 2],
                   [0, 1, 3]]),
     )
Exemplo n.º 3
0
 def test_grid_combo_binary_onehot(self):
     grid_combos = _get_grid_combos(feature_grids=[[0, 1], ["a", "b", "c"]],
                                    feature_types=["binary", "onehot"])
     assert_array_equal(
         grid_combos,
         np.array([
             [0, 1, 0, 0],
             [0, 0, 1, 0],
             [0, 0, 0, 1],
             [1, 1, 0, 0],
             [1, 0, 1, 0],
             [1, 0, 0, 1],
         ]),
     )
Exemplo n.º 4
0
 def test_grid_combo_numeric_onehot(self):
     grid_combos = _get_grid_combos(
         feature_grids=[[-1, -2], ["a", "b", "c"]],
         feature_types=["numeric", "onehot"],
     )
     assert_array_equal(
         grid_combos,
         np.array([
             [-1, 1, 0, 0],
             [-1, 0, 1, 0],
             [-1, 0, 0, 1],
             [-2, 1, 0, 0],
             [-2, 0, 1, 0],
             [-2, 0, 0, 1],
         ]),
     )
Exemplo n.º 5
0
 def test_grid_combo_onehot_onehot(self):
     grid_combos = _get_grid_combos(
         feature_grids=[["one", "two"], ["a", "b", "c"]],
         feature_types=["onehot", "onehot"],
     )
     assert_array_equal(
         grid_combos,
         np.array([
             [1, 0, 1, 0, 0],
             [1, 0, 0, 1, 0],
             [1, 0, 0, 0, 1],
             [0, 1, 1, 0, 0],
             [0, 1, 0, 1, 0],
             [0, 1, 0, 0, 1],
         ]),
     )
Exemplo n.º 6
0
 def test_grid_combo_numeric_binary(self):
     grid_combos = _get_grid_combos(feature_grids=[[-1, -2], [0, 1]],
                                    feature_types=["numeric", "binary"])
     assert_array_equal(grid_combos,
                        np.array([[-1, 0], [-1, 1], [-2, 0], [-2, 1]]))
Exemplo n.º 7
0
 def test_grid_combo_onehot_binary(self):
     grid_combos = _get_grid_combos(feature_grids=[["one", "two"], [0, 1]],
                                    feature_types=["onehot", "binary"])
     assert_array_equal(
         grid_combos, np.array([[1, 0, 0], [1, 0, 1], [0, 1, 0], [0, 1,
                                                                  1]]))
Exemplo n.º 8
0
 def test_grid_combo_binary_numeric(self):
     grid_combos = _get_grid_combos(feature_grids=[[0, 1], [1, 2, 3]],
                                    feature_types=["binary", "numeric"])
     assert_array_equal(
         grid_combos,
         np.array([[0, 1], [0, 2], [0, 3], [1, 1], [1, 2], [1, 3]]))
Exemplo n.º 9
0
 def test_grid_combo_binary_binary(self):
     grid_combos = _get_grid_combos(feature_grids=[[0, 1], [0, 1]],
                                    feature_types=["binary", "binary"])
     assert_array_equal(grid_combos,
                        np.array([[0, 0], [0, 1], [1, 0], [1, 1]]))
Exemplo n.º 10
0
def pdp_multi_interact(model,
                       dataset,
                       model_features,
                       features,
                       num_grid_points=None,
                       grid_types=None,
                       percentile_ranges=None,
                       grid_ranges=None,
                       cust_grid_points=None,
                       cust_grid_combos=None,
                       use_custom_grid_combos=False,
                       memory_limit=0.5,
                       n_jobs=1,
                       predict_kwds=None,
                       data_transformer=None):
    def _expand_default(x, default, length):
        if x is None:
            return [default] * length
        return x

    def _get_grid_combos(feature_grids, feature_types):
        grids = [list(feature_grid) for feature_grid in feature_grids]
        for i in range(len(feature_types)):
            if feature_types[i] == 'onehot':
                grids[i] = np.eye(len(grids[i])).astype(int).tolist()
        return np.stack(np.meshgrid(*grids), -1).reshape(-1, len(grids))

    if predict_kwds is None:
        predict_kwds = dict()

    nr_feats = len(features)

    # check function inputs
    n_classes, predict = _check_model(model=model)
    _check_dataset(df=dataset)
    _dataset = dataset.copy()

    # prepare the grid
    pdp_isolate_outs = []
    if use_custom_grid_combos:
        grid_combos = cust_grid_combos
        feature_grids = []
        feature_types = []
    else:
        num_grid_points = _expand_default(x=num_grid_points,
                                          default=10,
                                          length=nr_feats)
        grid_types = _expand_default(x=grid_types,
                                     default='percentile',
                                     length=nr_feats)
        for i in range(nr_feats):
            _check_grid_type(grid_type=grid_types[i])

        percentile_ranges = _expand_default(x=percentile_ranges,
                                            default=None,
                                            length=nr_feats)
        for i in range(nr_feats):
            _check_percentile_range(percentile_range=percentile_ranges[i])

        grid_ranges = _expand_default(x=grid_ranges,
                                      default=None,
                                      length=nr_feats)
        cust_grid_points = _expand_default(x=cust_grid_points,
                                           default=None,
                                           length=nr_feats)

        _check_memory_limit(memory_limit=memory_limit)

        pdp_isolate_outs = []
        for idx in range(nr_feats):
            pdp_isolate_out = pdp_isolate(
                model=model,
                dataset=_dataset,
                model_features=model_features,
                feature=features[idx],
                num_grid_points=num_grid_points[idx],
                grid_type=grid_types[idx],
                percentile_range=percentile_ranges[idx],
                grid_range=grid_ranges[idx],
                cust_grid_points=cust_grid_points[idx],
                memory_limit=memory_limit,
                n_jobs=n_jobs,
                predict_kwds=predict_kwds,
                data_transformer=data_transformer)
            pdp_isolate_outs.append(pdp_isolate_out)

        if n_classes > 2:
            feature_grids = [
                pdp_isolate_outs[i][0].feature_grids for i in range(nr_feats)
            ]
            feature_types = [
                pdp_isolate_outs[i][0].feature_type for i in range(nr_feats)
            ]
        else:
            feature_grids = [
                pdp_isolate_outs[i].feature_grids for i in range(nr_feats)
            ]
            feature_types = [
                pdp_isolate_outs[i].feature_type for i in range(nr_feats)
            ]

        grid_combos = _get_grid_combos(feature_grids, feature_types)

    feature_list = []
    for i in range(nr_feats):
        feature_list.extend(_make_list(features[i]))

    # Parallel calculate ICE lines
    true_n_jobs = _calc_memory_usage(df=_dataset,
                                     total_units=len(grid_combos),
                                     n_jobs=n_jobs,
                                     memory_limit=memory_limit)

    grid_results = Parallel(n_jobs=true_n_jobs)(
        delayed(_calc_ice_lines_inter)(grid_combo,
                                       data=_dataset,
                                       model=model,
                                       model_features=model_features,
                                       n_classes=n_classes,
                                       feature_list=feature_list,
                                       predict_kwds=predict_kwds,
                                       data_transformer=data_transformer)
        for grid_combo in grid_combos)

    ice_lines = pd.concat(grid_results, axis=0).reset_index(drop=True)
    pdp = ice_lines.groupby(feature_list, as_index=False).mean()

    # combine the final results
    pdp_interact_params = {
        'n_classes': n_classes,
        'features': features,
        'feature_types': feature_types,
        'feature_grids': feature_grids
    }
    if n_classes > 2:
        pdp_interact_out = []
        for n_class in range(n_classes):
            _pdp = pdp[feature_list + ['class_%d_preds' % n_class]].rename(
                columns={'class_%d_preds' % n_class: 'preds'})
            pdp_interact_out.append(
                PDPInteract(which_class=n_class,
                            pdp_isolate_outs=[
                                pdp_isolate_outs[i][n_class]
                                for i in range(nr_feats)
                            ],
                            pdp=_pdp,
                            **pdp_interact_params))
    else:
        pdp_interact_out = PDPInteract(which_class=None,
                                       pdp_isolate_outs=pdp_isolate_outs,
                                       pdp=pdp,
                                       **pdp_interact_params)

    return pdp_interact_out
Exemplo n.º 11
0
 def test_grid_combo_numeric_onehot(self):
     grid_combos = _get_grid_combos(feature_grids=[[-1, -2], ['a', 'b', 'c']], feature_types=['numeric', 'onehot'])
     assert_array_equal(grid_combos, np.array([[-1, 1, 0, 0], [-1, 0, 1, 0], [-1, 0, 0, 1],
                                               [-2, 1, 0, 0], [-2, 0, 1, 0], [-2, 0, 0, 1]]))
Exemplo n.º 12
0
 def test_grid_combo_onehot_numeric(self):
     grid_combos = _get_grid_combos(feature_grids=[['one', 'two'], [1, 2, 3]], feature_types=['onehot', 'numeric'])
     assert_array_equal(grid_combos, np.array([[1, 0, 1], [1, 0, 2], [1, 0, 3],
                                               [0, 1, 1], [0, 1, 2], [0, 1, 3]]))
Exemplo n.º 13
0
 def test_grid_combo_onehot_binary(self):
     grid_combos = _get_grid_combos(feature_grids=[['one', 'two'], [0, 1]], feature_types=['onehot', 'binary'])
     assert_array_equal(grid_combos, np.array([[1, 0, 0], [1, 0, 1], [0, 1, 0], [0, 1, 1]]))
Exemplo n.º 14
0
 def test_grid_combo_binary_onehot(self):
     grid_combos = _get_grid_combos(feature_grids=[[0, 1], ['a', 'b', 'c']], feature_types=['binary', 'onehot'])
     assert_array_equal(grid_combos, np.array([[0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1],
                                               [1, 1, 0, 0], [1, 0, 1, 0], [1, 0, 0, 1]]))