def test_grid_combo_numeric_numeric(self): grid_combos = _get_grid_combos(feature_grids=[[-1, -2], [1, 2, 3]], feature_types=["numeric", "numeric"]) assert_array_equal( grid_combos, np.array([[-1, 1], [-1, 2], [-1, 3], [-2, 1], [-2, 2], [-2, 3]]), )
def test_grid_combo_onehot_numeric(self): grid_combos = _get_grid_combos( feature_grids=[["one", "two"], [1, 2, 3]], feature_types=["onehot", "numeric"], ) assert_array_equal( grid_combos, np.array([[1, 0, 1], [1, 0, 2], [1, 0, 3], [0, 1, 1], [0, 1, 2], [0, 1, 3]]), )
def test_grid_combo_binary_onehot(self): grid_combos = _get_grid_combos(feature_grids=[[0, 1], ["a", "b", "c"]], feature_types=["binary", "onehot"]) assert_array_equal( grid_combos, np.array([ [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1], [1, 1, 0, 0], [1, 0, 1, 0], [1, 0, 0, 1], ]), )
def test_grid_combo_numeric_onehot(self): grid_combos = _get_grid_combos( feature_grids=[[-1, -2], ["a", "b", "c"]], feature_types=["numeric", "onehot"], ) assert_array_equal( grid_combos, np.array([ [-1, 1, 0, 0], [-1, 0, 1, 0], [-1, 0, 0, 1], [-2, 1, 0, 0], [-2, 0, 1, 0], [-2, 0, 0, 1], ]), )
def test_grid_combo_onehot_onehot(self): grid_combos = _get_grid_combos( feature_grids=[["one", "two"], ["a", "b", "c"]], feature_types=["onehot", "onehot"], ) assert_array_equal( grid_combos, np.array([ [1, 0, 1, 0, 0], [1, 0, 0, 1, 0], [1, 0, 0, 0, 1], [0, 1, 1, 0, 0], [0, 1, 0, 1, 0], [0, 1, 0, 0, 1], ]), )
def test_grid_combo_numeric_binary(self): grid_combos = _get_grid_combos(feature_grids=[[-1, -2], [0, 1]], feature_types=["numeric", "binary"]) assert_array_equal(grid_combos, np.array([[-1, 0], [-1, 1], [-2, 0], [-2, 1]]))
def test_grid_combo_onehot_binary(self): grid_combos = _get_grid_combos(feature_grids=[["one", "two"], [0, 1]], feature_types=["onehot", "binary"]) assert_array_equal( grid_combos, np.array([[1, 0, 0], [1, 0, 1], [0, 1, 0], [0, 1, 1]]))
def test_grid_combo_binary_numeric(self): grid_combos = _get_grid_combos(feature_grids=[[0, 1], [1, 2, 3]], feature_types=["binary", "numeric"]) assert_array_equal( grid_combos, np.array([[0, 1], [0, 2], [0, 3], [1, 1], [1, 2], [1, 3]]))
def test_grid_combo_binary_binary(self): grid_combos = _get_grid_combos(feature_grids=[[0, 1], [0, 1]], feature_types=["binary", "binary"]) assert_array_equal(grid_combos, np.array([[0, 0], [0, 1], [1, 0], [1, 1]]))
def pdp_multi_interact(model, dataset, model_features, features, num_grid_points=None, grid_types=None, percentile_ranges=None, grid_ranges=None, cust_grid_points=None, cust_grid_combos=None, use_custom_grid_combos=False, memory_limit=0.5, n_jobs=1, predict_kwds=None, data_transformer=None): def _expand_default(x, default, length): if x is None: return [default] * length return x def _get_grid_combos(feature_grids, feature_types): grids = [list(feature_grid) for feature_grid in feature_grids] for i in range(len(feature_types)): if feature_types[i] == 'onehot': grids[i] = np.eye(len(grids[i])).astype(int).tolist() return np.stack(np.meshgrid(*grids), -1).reshape(-1, len(grids)) if predict_kwds is None: predict_kwds = dict() nr_feats = len(features) # check function inputs n_classes, predict = _check_model(model=model) _check_dataset(df=dataset) _dataset = dataset.copy() # prepare the grid pdp_isolate_outs = [] if use_custom_grid_combos: grid_combos = cust_grid_combos feature_grids = [] feature_types = [] else: num_grid_points = _expand_default(x=num_grid_points, default=10, length=nr_feats) grid_types = _expand_default(x=grid_types, default='percentile', length=nr_feats) for i in range(nr_feats): _check_grid_type(grid_type=grid_types[i]) percentile_ranges = _expand_default(x=percentile_ranges, default=None, length=nr_feats) for i in range(nr_feats): _check_percentile_range(percentile_range=percentile_ranges[i]) grid_ranges = _expand_default(x=grid_ranges, default=None, length=nr_feats) cust_grid_points = _expand_default(x=cust_grid_points, default=None, length=nr_feats) _check_memory_limit(memory_limit=memory_limit) pdp_isolate_outs = [] for idx in range(nr_feats): pdp_isolate_out = pdp_isolate( model=model, dataset=_dataset, model_features=model_features, feature=features[idx], num_grid_points=num_grid_points[idx], grid_type=grid_types[idx], percentile_range=percentile_ranges[idx], grid_range=grid_ranges[idx], cust_grid_points=cust_grid_points[idx], memory_limit=memory_limit, n_jobs=n_jobs, predict_kwds=predict_kwds, data_transformer=data_transformer) pdp_isolate_outs.append(pdp_isolate_out) if n_classes > 2: feature_grids = [ pdp_isolate_outs[i][0].feature_grids for i in range(nr_feats) ] feature_types = [ pdp_isolate_outs[i][0].feature_type for i in range(nr_feats) ] else: feature_grids = [ pdp_isolate_outs[i].feature_grids for i in range(nr_feats) ] feature_types = [ pdp_isolate_outs[i].feature_type for i in range(nr_feats) ] grid_combos = _get_grid_combos(feature_grids, feature_types) feature_list = [] for i in range(nr_feats): feature_list.extend(_make_list(features[i])) # Parallel calculate ICE lines true_n_jobs = _calc_memory_usage(df=_dataset, total_units=len(grid_combos), n_jobs=n_jobs, memory_limit=memory_limit) grid_results = Parallel(n_jobs=true_n_jobs)( delayed(_calc_ice_lines_inter)(grid_combo, data=_dataset, model=model, model_features=model_features, n_classes=n_classes, feature_list=feature_list, predict_kwds=predict_kwds, data_transformer=data_transformer) for grid_combo in grid_combos) ice_lines = pd.concat(grid_results, axis=0).reset_index(drop=True) pdp = ice_lines.groupby(feature_list, as_index=False).mean() # combine the final results pdp_interact_params = { 'n_classes': n_classes, 'features': features, 'feature_types': feature_types, 'feature_grids': feature_grids } if n_classes > 2: pdp_interact_out = [] for n_class in range(n_classes): _pdp = pdp[feature_list + ['class_%d_preds' % n_class]].rename( columns={'class_%d_preds' % n_class: 'preds'}) pdp_interact_out.append( PDPInteract(which_class=n_class, pdp_isolate_outs=[ pdp_isolate_outs[i][n_class] for i in range(nr_feats) ], pdp=_pdp, **pdp_interact_params)) else: pdp_interact_out = PDPInteract(which_class=None, pdp_isolate_outs=pdp_isolate_outs, pdp=pdp, **pdp_interact_params) return pdp_interact_out
def test_grid_combo_numeric_onehot(self): grid_combos = _get_grid_combos(feature_grids=[[-1, -2], ['a', 'b', 'c']], feature_types=['numeric', 'onehot']) assert_array_equal(grid_combos, np.array([[-1, 1, 0, 0], [-1, 0, 1, 0], [-1, 0, 0, 1], [-2, 1, 0, 0], [-2, 0, 1, 0], [-2, 0, 0, 1]]))
def test_grid_combo_onehot_numeric(self): grid_combos = _get_grid_combos(feature_grids=[['one', 'two'], [1, 2, 3]], feature_types=['onehot', 'numeric']) assert_array_equal(grid_combos, np.array([[1, 0, 1], [1, 0, 2], [1, 0, 3], [0, 1, 1], [0, 1, 2], [0, 1, 3]]))
def test_grid_combo_onehot_binary(self): grid_combos = _get_grid_combos(feature_grids=[['one', 'two'], [0, 1]], feature_types=['onehot', 'binary']) assert_array_equal(grid_combos, np.array([[1, 0, 0], [1, 0, 1], [0, 1, 0], [0, 1, 1]]))
def test_grid_combo_binary_onehot(self): grid_combos = _get_grid_combos(feature_grids=[[0, 1], ['a', 'b', 'c']], feature_types=['binary', 'onehot']) assert_array_equal(grid_combos, np.array([[0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1], [1, 1, 0, 0], [1, 0, 1, 0], [1, 0, 0, 1]]))