def transform(self, X): # add column of ones to X X = hstack([np.ones((shape(X)[0], 1)), X]) d_x = shape(X)[1] d_y, d_t = self._d_y, self._d_t # for each row, create the d_y*d_t*(d_x+1) features (which are matrices of size d_y by d_t) return reshape(np.einsum('nx,fyt->nfxyt', X, self._fts), (shape(X)[0], d_y * d_t * d_x, d_y, d_t))
def _generate_recoverable_errors(a_X, X, a_W=None, W=None, featurizer=FunctionTransformer()): """Return error vectors e_t and e_y such that OLS can recover the true coefficients from both stages.""" if W is None: W = np.empty((shape(X)[0], 0)) if a_W is None: a_W = np.zeros((shape(W)[1], )) # to correctly recover coefficients for T via OLS, we need e_t to be orthogonal to [W;X] WX = hstack([W, X]) e_t = rand_sol(WX.T, np.zeros((shape(WX)[1], ))) # to correctly recover coefficients for Y via OLS, we need ([X; W]⊗[1; ϕ(X); W])⁺ e_y = # -([X; W]⊗[1; ϕ(X); W])⁺ ((ϕ(X)⊗e_t)a_X+(W⊗e_t)a_W) # then, to correctly recover a in the third stage, we additionally need (ϕ(X)⊗e_t)ᵀ e_y = 0 ϕ = featurizer.fit_transform(X) v_X = cross_product(ϕ, e_t) v_W = cross_product(W, e_t) M = np.linalg.pinv( cross_product(WX, hstack([np.ones((shape(WX)[0], 1)), ϕ, W]))) e_y = rand_sol( vstack([M, v_X.T]), vstack([-M @ (v_X @ a_X + v_W @ a_W), np.zeros((shape(v_X)[1], ))])) return e_t, e_y
def test_2sls_shape(self): n = 100 def make_random(d): sz = (n, d) if d >= 0 else (n,) return np.random.normal(size=sz) for d_t in [-1, 1, 2]: n_t = d_t if d_t > 0 else 1 for d_y in [-1, 1, 2]: for d_x in [1, 5]: for d_z in [1, 2]: d_w = 1 if d_z >= n_t: T, Y, X, Z, W = [make_random(d) for d in [d_t, d_y, d_x, d_z, d_w]] est = NonparametricTwoStageLeastSquares( t_featurizer=PolynomialFeatures(), x_featurizer=PolynomialFeatures(), z_featurizer=PolynomialFeatures(), dt_featurizer=DPolynomialFeatures()) est.fit(Y, T, X=X, W=W, Z=Z) eff = est.effect(X) marg_eff = est.marginal_effect(T, X) effect_shape = (n,) + ((d_y,) if d_y > 0 else ()) marginal_effect_shape = ((n if d_x else 1,) + ((d_y,) if d_y > 0 else ()) + ((d_t,) if d_t > 0 else())) self.assertEqual(shape(marg_eff), marginal_effect_shape) self.assertEqual(shape(eff), effect_shape)
def test_2sls(self): n = 50000 e = np.random.uniform(low=-0.5, high=0.5, size=(n, 1)) z = np.random.uniform(size=(n, 1)) x = np.random.uniform(size=(n, 1)) + e p = x + z * e + np.random.uniform(size=(n, 1)) y = p * x + e losses = [] marg_effs = [] z_fresh = np.random.uniform(size=(n, 1)) e_fresh = np.random.uniform(low=-0.5, high=0.5, size=(n, 1)) x_fresh = np.random.uniform(size=(n, 1)) + e_fresh p_fresh = x_fresh + z_fresh * e_fresh + np.random.uniform(size=(n, 1)) for (dt, dx, dz) in [(0, 0, 0), (1, 1, 1), (5, 5, 5), (10, 10, 10), (3, 3, 10), (10, 10, 3)]: np2sls = NonparametricTwoStageLeastSquares( HermiteFeatures(dt), HermiteFeatures(dx), HermiteFeatures(dz), HermiteFeatures(dt, shift=1)) np2sls.fit(y, p, x, z) effect = np2sls.effect(x_fresh, np.zeros(shape(p_fresh)), p_fresh) losses.append(np.mean(np.square(p_fresh * x_fresh - effect))) marg_effs.append( np2sls.marginal_effect(np.array([[0.3], [0.5], [0.7]]), np.array([[0.4], [0.6], [0.2]]))) print("losses: {}".format(losses)) print("marg_effs: {}".format(marg_effs))
def fit(self, X, y, sample_weight=None): """ Fit the ordinary least squares model. Parameters ---------- X : array-like, shape (n_samples, n_features) Training data y : array_like, shape (n_samples, 1) or (n_samples,) Target values sample_weight : array_like, shape (n_samples,) Individual weights for each sample Returns ------- self """ assert ndim(y) == 1 or (ndim(y) == 2 and shape(y)[1] == 1) y = reshape(y, (-1,)) if self.fit_intercept: X = add_constant(X, has_constant='add') if sample_weight is not None: ols = WLS(y, X, weights=sample_weight, hasconst=self.fit_intercept) else: ols = WLS(y, X, hasconst=self.fit_intercept) self.results = ols.fit(**self.fit_args) return self
def test_hermite_shape(self): for d, s in [(3, 0), (4, 2)]: for j in [True, False]: for n, x in [(5, 1), (7, 3)]: last_dim = (d + 1)**x if j else (d + 1) * x correct_shape = (n,) + (x,) * s + (last_dim,) output_shape = shape(HermiteFeatures(d, s, j).fit_transform(np.zeros((n, x)))) assert output_shape == correct_shape
def fit(self, X, y, sample_weight=None): self.needs_unravel = False if ndim(y) == 2 and shape(y)[1] > 1: self.model = WeightedMultiTaskLassoCV(*self.args, **self.kwargs) else: if ndim(y) == 2 and shape(y)[1] == 1: y = np.ravel(y) self.needs_unravel = True self.model = WeightedLassoCV(*self.args, **self.kwargs) self.model.fit(X, y, sample_weight) # set intercept_ attribute self.intercept_ = self.model.intercept_ # set coef_ attribute self.coef_ = self.model.coef_ # set alpha_ attribute self.alpha_ = self.model.alpha_ # set alphas_ attribute self.alphas_ = self.model.alphas_ # set n_iter_ attribute self.n_iter_ = self.model.n_iter_ return self
def test_hermite_results(self): inputs = np.random.normal(size=(5, 1)) hf = HermiteFeatures(3).fit_transform(inputs) # first polynomials are 1, x, x*x-1, x*x*x-3*x ones = np.ones(shape(inputs)) polys = np.hstack([ones, inputs, inputs * inputs - ones, inputs * inputs * inputs - 3 * inputs]) assert(np.allclose(hf, polys * np.exp(-inputs * inputs / 2))) for j in [True, False]: hf = HermiteFeatures(1, shift=1, joint=j).fit_transform(inputs) # first derivatives are -x, -x^2+1 (since there's just one column, joint-ness doesn't matter) polys = np.hstack([-inputs, -inputs * inputs + ones]) assert(np.allclose(hf, reshape(polys * np.exp(-inputs * inputs / 2), (5, 1, 2))))
def test_2sls(self): n = 50000 d_w = 2 d_z = 1 d_x = 1 d_t = 1 d_y = 1 e = np.random.uniform(low=-0.5, high=0.5, size=(n, d_x)) z = np.random.uniform(size=(n, 1)) w = np.random.uniform(size=(n, d_w)) a = np.random.normal(size=(d_w, d_t)) b = np.random.normal(size=(d_w, d_y)) x = np.random.uniform(size=(n, d_x)) + e p = x + z * e + w @ a + np.random.uniform(size=(n, d_t)) y = p * x + e + w @ b losses = [] marg_effs = [] z_fresh = np.random.uniform(size=(n, d_z)) e_fresh = np.random.uniform(low=-0.5, high=0.5, size=(n, d_x)) x_fresh = np.random.uniform(size=(n, d_x)) + e_fresh w_fresh = np.random.uniform(size=(n, d_w)) p_fresh = x_fresh + z_fresh * e_fresh + np.random.uniform(size=(n, d_t)) for (dt, dx, dz) in [(0, 0, 0), (1, 1, 1), (5, 5, 5), (10, 10, 10), (3, 3, 10), (10, 10, 3)]: np2sls = SieveTSLS(t_featurizer=HermiteFeatures(dt), x_featurizer=HermiteFeatures(dx), z_featurizer=HermiteFeatures(dz), dt_featurizer=HermiteFeatures(dt, shift=1)) np2sls.fit(y, p, X=x, W=w, Z=z) effect = np2sls.effect(x_fresh, np.zeros(shape(p_fresh)), p_fresh) losses.append(np.mean(np.square(p_fresh * x_fresh - effect))) marg_effs.append( np2sls.marginal_effect(np.array([[0.3], [0.5], [0.7]]), np.array([[0.4], [0.6], [0.2]]))) print("losses: {}".format(losses)) print("marg_effs: {}".format(marg_effs))
def test_cate_api(self): """Test that we correctly implement the CATE API.""" n_panels = 100 # number of panels n_periods = 3 # number of time periods per panel n = n_panels * n_periods groups = np.repeat(a=np.arange(n_panels), repeats=n_periods, axis=0) def make_random(n, is_discrete, d): if d is None: return None sz = (n, d) if d >= 0 else (n,) if is_discrete: return np.random.choice(['a', 'b', 'c'], size=sz) else: return np.random.normal(size=sz) for d_t in [2, 1, -1]: for is_discrete in [True, False] if d_t <= 1 else [False]: # for is_discrete in [False]: for d_y in [3, 1, -1]: for d_x in [2, None]: for d_w in [2, None]: W, X, Y, T = [make_random(n, is_discrete, d) for is_discrete, d in [(False, d_w), (False, d_x), (False, d_y), (is_discrete, d_t)]] T_test = np.hstack([(T.reshape(-1, 1) if d_t == -1 else T) for i in range(n_periods)]) for featurizer, fit_cate_intercept in\ [(None, True), (PolynomialFeatures(degree=2, include_bias=False), True), (PolynomialFeatures(degree=2, include_bias=True), False)]: d_t_final = (2 if is_discrete else max(d_t, 1)) * n_periods effect_shape = (n,) + ((d_y,) if d_y > 0 else ()) effect_summaryframe_shape = (n * (d_y if d_y > 0 else 1), 6) marginal_effect_shape = ((n,) + ((d_y,) if d_y > 0 else ()) + ((d_t_final,) if d_t_final > 0 else ())) marginal_effect_summaryframe_shape = (n * (d_y if d_y > 0 else 1) * (d_t_final if d_t_final > 0 else 1), 6) # since T isn't passed to const_marginal_effect, defaults to one row if X is None const_marginal_effect_shape = ((n if d_x else 1,) + ((d_y,) if d_y > 0 else ()) + ((d_t_final,) if d_t_final > 0 else())) const_marginal_effect_summaryframe_shape = ( (n if d_x else 1) * (d_y if d_y > 0 else 1) * (d_t_final if d_t_final > 0 else 1), 6) fd_x = featurizer.fit_transform(X).shape[1:] if featurizer and d_x\ else ((d_x,) if d_x else (0,)) coef_shape = Y.shape[1:] + (d_t_final, ) + fd_x coef_summaryframe_shape = ( (d_y if d_y > 0 else 1) * (fd_x[0] if fd_x[0] > 0 else 1) * (d_t_final), 6) intercept_shape = Y.shape[1:] + (d_t_final, ) intercept_summaryframe_shape = ( (d_y if d_y > 0 else 1) * (d_t_final if d_t_final > 0 else 1), 6) all_infs = [None, 'auto', BootstrapInference(2)] est = DynamicDML(model_y=Lasso() if d_y < 1 else MultiTaskLasso(), model_t=LogisticRegression() if is_discrete else (Lasso() if d_t < 1 else MultiTaskLasso()), featurizer=featurizer, fit_cate_intercept=fit_cate_intercept, discrete_treatment=is_discrete) # ensure we can serialize the unfit estimator pickle.dumps(est) for inf in all_infs: with self.subTest(d_w=d_w, d_x=d_x, d_y=d_y, d_t=d_t, is_discrete=is_discrete, est=est, inf=inf): if X is None and (not fit_cate_intercept): with pytest.raises(AttributeError): est.fit(Y, T, X=X, W=W, groups=groups, inference=inf) continue est.fit(Y, T, X=X, W=W, groups=groups, inference=inf) # ensure we can pickle the fit estimator pickle.dumps(est) # make sure we can call the marginal_effect and effect methods const_marg_eff = est.const_marginal_effect(X) marg_eff = est.marginal_effect(T_test, X) self.assertEqual(shape(marg_eff), marginal_effect_shape) self.assertEqual(shape(const_marg_eff), const_marginal_effect_shape) np.testing.assert_allclose( marg_eff if d_x else marg_eff[0:1], const_marg_eff) assert len(est.score_) == n_periods for score in est.nuisance_scores_y[0]: assert score.shape == (n_periods, ) for score in est.nuisance_scores_t[0]: assert score.shape == (n_periods, n_periods) T0 = np.full_like(T_test, 'a') if is_discrete else np.zeros_like(T_test) eff = est.effect(X, T0=T0, T1=T_test) self.assertEqual(shape(eff), effect_shape) self.assertEqual(shape(est.coef_), coef_shape) if fit_cate_intercept: self.assertEqual(shape(est.intercept_), intercept_shape) else: with pytest.raises(AttributeError): self.assertEqual(shape(est.intercept_), intercept_shape) if inf is not None: const_marg_eff_int = est.const_marginal_effect_interval(X) marg_eff_int = est.marginal_effect_interval(T_test, X) self.assertEqual(shape(marg_eff_int), (2,) + marginal_effect_shape) self.assertEqual(shape(const_marg_eff_int), (2,) + const_marginal_effect_shape) self.assertEqual(shape(est.effect_interval(X, T0=T0, T1=T_test)), (2,) + effect_shape) self.assertEqual(shape(est.coef__interval()), (2,) + coef_shape) if fit_cate_intercept: self.assertEqual(shape(est.intercept__interval()), (2,) + intercept_shape) else: with pytest.raises(AttributeError): self.assertEqual(shape(est.intercept__interval()), (2,) + intercept_shape) const_marg_effect_inf = est.const_marginal_effect_inference(X) T1 = np.full_like(T_test, 'b') if is_discrete else T_test effect_inf = est.effect_inference(X, T0=T0, T1=T1) marg_effect_inf = est.marginal_effect_inference(T_test, X) # test const marginal inference self.assertEqual(shape(const_marg_effect_inf.summary_frame()), const_marginal_effect_summaryframe_shape) self.assertEqual(shape(const_marg_effect_inf.point_estimate), const_marginal_effect_shape) self.assertEqual(shape(const_marg_effect_inf.stderr), const_marginal_effect_shape) self.assertEqual(shape(const_marg_effect_inf.var), const_marginal_effect_shape) self.assertEqual(shape(const_marg_effect_inf.pvalue()), const_marginal_effect_shape) self.assertEqual(shape(const_marg_effect_inf.zstat()), const_marginal_effect_shape) self.assertEqual(shape(const_marg_effect_inf.conf_int()), (2,) + const_marginal_effect_shape) np.testing.assert_array_almost_equal( const_marg_effect_inf.conf_int()[0], const_marg_eff_int[0], decimal=5) const_marg_effect_inf.population_summary()._repr_html_() # test effect inference self.assertEqual(shape(effect_inf.summary_frame()), effect_summaryframe_shape) self.assertEqual(shape(effect_inf.point_estimate), effect_shape) self.assertEqual(shape(effect_inf.stderr), effect_shape) self.assertEqual(shape(effect_inf.var), effect_shape) self.assertEqual(shape(effect_inf.pvalue()), effect_shape) self.assertEqual(shape(effect_inf.zstat()), effect_shape) self.assertEqual(shape(effect_inf.conf_int()), (2,) + effect_shape) np.testing.assert_array_almost_equal( effect_inf.conf_int()[0], est.effect_interval(X, T0=T0, T1=T1)[0], decimal=5) effect_inf.population_summary()._repr_html_() # test marginal effect inference self.assertEqual(shape(marg_effect_inf.summary_frame()), marginal_effect_summaryframe_shape) self.assertEqual(shape(marg_effect_inf.point_estimate), marginal_effect_shape) self.assertEqual(shape(marg_effect_inf.stderr), marginal_effect_shape) self.assertEqual(shape(marg_effect_inf.var), marginal_effect_shape) self.assertEqual(shape(marg_effect_inf.pvalue()), marginal_effect_shape) self.assertEqual(shape(marg_effect_inf.zstat()), marginal_effect_shape) self.assertEqual(shape(marg_effect_inf.conf_int()), (2,) + marginal_effect_shape) np.testing.assert_array_almost_equal( marg_effect_inf.conf_int()[0], marg_eff_int[0], decimal=5) marg_effect_inf.population_summary()._repr_html_() # test coef__inference and intercept__inference if X is not None: self.assertEqual( shape(est.coef__inference().summary_frame()), coef_summaryframe_shape) np.testing.assert_array_almost_equal( est.coef__inference().conf_int() [0], est.coef__interval()[0], decimal=5) if fit_cate_intercept: cm = ExitStack() # ExitStack can be used as a "do nothing" ContextManager else: cm = pytest.raises(AttributeError) with cm: self.assertEqual(shape(est.intercept__inference(). summary_frame()), intercept_summaryframe_shape) np.testing.assert_array_almost_equal( est.intercept__inference().conf_int() [0], est.intercept__interval()[0], decimal=5) est.summary() est.score(Y, T, X, W, groups=groups) # make sure we can call effect with implied scalar treatments, # no matter the dimensions of T, and also that we warn when there # are multiple treatments if d_t > 1: cm = self.assertWarns(Warning) else: # ExitStack can be used as a "do nothing" ContextManager cm = ExitStack() with cm: effect_shape2 = (n if d_x else 1,) + ((d_y,) if d_y > 0 else()) eff = est.effect(X) if not is_discrete else est.effect( X, T0='a', T1='b') self.assertEqual(shape(eff), effect_shape2)
def test_cate_api(self): """Test that we correctly implement the CATE API.""" n = 30 def size(n, d): return (n, d) if d >= 0 else (n, ) def make_random(is_discrete, d): if d is None: return None sz = size(n, d) if is_discrete: while True: arr = np.random.choice(['a', 'b', 'c'], size=sz) # ensure that we've got at least two of every row _, counts = np.unique(arr, return_counts=True, axis=0) if len(counts) == 3**(d if d > 0 else 1) and counts.min() > 1: return arr else: return np.random.normal(size=sz) def eff_shape(n, d_y): return (n, ) + ((d_y, ) if d_y > 0 else ()) def marg_eff_shape(n, d_y, d_t_final): return ((n, ) + ((d_y, ) if d_y > 0 else ()) + ((d_t_final, ) if d_t_final > 0 else ())) # since T isn't passed to const_marginal_effect, defaults to one row if X is None def const_marg_eff_shape(n, d_x, d_y, d_t_final): return ((n if d_x else 1, ) + ((d_y, ) if d_y > 0 else ()) + ((d_t_final, ) if d_t_final > 0 else ())) for d_t in [2, 1, -1]: n_t = d_t if d_t > 0 else 1 for discrete_t in [True, False] if n_t == 1 else [False]: for d_y in [3, 1, -1]: for d_q in [2, None]: for d_z in [2, 1]: if d_z < n_t: continue for discrete_z in [True, False ] if d_z == 1 else [False]: Z1, Q, Y, T1 = [ make_random(is_discrete, d) for is_discrete, d in [( discrete_z, d_z), (False, d_q), (False, d_y), (discrete_t, d_t)] ] if discrete_t and discrete_z: # need to make sure we get all *joint* combinations arr = make_random(True, 2) Z1 = arr[:, 0].reshape(size(n, d_z)) T1 = arr[:, 0].reshape(size(n, d_t)) d_t_final1 = 2 if discrete_t else d_t if discrete_t: # IntentToTreat only supports binary treatments/instruments T2 = T1.copy() T2[T1 == 'c'] = np.random.choice( ['a', 'b'], size=np.count_nonzero(T1 == 'c')) d_t_final2 = 1 if discrete_z: # IntentToTreat only supports binary treatments/instruments Z2 = Z1.copy() Z2[Z1 == 'c'] = np.random.choice( ['a', 'b'], size=np.count_nonzero(Z1 == 'c')) effect_shape = eff_shape(n, d_y) model_t = LogisticRegression( ) if discrete_t else Lasso() model_z = LogisticRegression( ) if discrete_z else Lasso() all_infs = [None, BootstrapInference(1)] estimators = [ (DMLATEIV(model_Y_W=Lasso(), model_T_W=model_t, model_Z_W=model_z, discrete_treatment=discrete_t, discrete_instrument=discrete_z), True, all_infs), (ProjectedDMLATEIV( model_Y_W=Lasso(), model_T_W=model_t, model_T_WZ=model_t, discrete_treatment=discrete_t, discrete_instrument=discrete_z), False, all_infs), (DMLIV(model_Y_X=Lasso(), model_T_X=model_t, model_T_XZ=model_t, model_final=Lasso(), discrete_treatment=discrete_t, discrete_instrument=discrete_z), False, all_infs) ] if d_q and discrete_t and discrete_z: # IntentToTreat requires X estimators.append((LinearIntentToTreatDRIV( model_Y_X=Lasso(), model_T_XZ=model_t, flexible_model_effect=WeightedLasso(), cv=2), False, all_infs + ['auto'])) for est, multi, infs in estimators: if not ( multi ) and d_y > 1 or d_t > 1 or d_z > 1: continue # ensure we can serialize unfit estimator pickle.dumps(est) d_ws = [None] if isinstance(est, LinearIntentToTreatDRIV): d_ws.append(2) for d_w in d_ws: W = make_random(False, d_w) for inf in infs: with self.subTest( d_z=d_z, d_x=d_q, d_y=d_y, d_t=d_t, discrete_t=discrete_t, discrete_z=discrete_z, est=est, inf=inf): Z = Z1 T = T1 d_t_final = d_t_final1 X = Q d_x = d_q if isinstance( est, (DMLATEIV, ProjectedDMLATEIV)): # these support only W but not X W = Q X = None d_x = None def fit(): return est.fit( Y, T, Z=Z, W=W, inference=inf) def score(): return est.score(Y, T, Z=Z, W=W) else: # these support only binary, not general discrete T and Z if discrete_t: T = T2 d_t_final = d_t_final2 if discrete_z: Z = Z2 if isinstance( est, LinearIntentToTreatDRIV ): def fit(): return est.fit( Y, T, Z=Z, X=X, W=W, inference=inf) def score(): return est.score( Y, T, Z=Z, X=X, W=W) else: def fit(): return est.fit( Y, T, Z=Z, X=X, inference=inf) def score(): return est.score( Y, T, Z=Z, X=X) marginal_effect_shape = marg_eff_shape( n, d_y, d_t_final) const_marginal_effect_shape = const_marg_eff_shape( n, d_x, d_y, d_t_final) fit() # ensure we can serialize fit estimator pickle.dumps(est) # make sure we can call the marginal_effect and effect methods const_marg_eff = est.const_marginal_effect( X) marg_eff = est.marginal_effect( T, X) self.assertEqual( shape(marg_eff), marginal_effect_shape) self.assertEqual( shape(const_marg_eff), const_marginal_effect_shape ) np.testing.assert_array_equal( marg_eff if d_x else marg_eff[0:1], const_marg_eff) T0 = np.full_like( T, 'a' ) if discrete_t else np.zeros_like( T) eff = est.effect(X, T0=T0, T1=T) self.assertEqual( shape(eff), effect_shape) # TODO: add tests for extra properties like coef_ where they exist if inf is not None: const_marg_eff_int = est.const_marginal_effect_interval( X) marg_eff_int = est.marginal_effect_interval( T, X) self.assertEqual( shape(marg_eff_int), (2, ) + marginal_effect_shape) self.assertEqual( shape( const_marg_eff_int ), (2, ) + const_marginal_effect_shape ) self.assertEqual( shape( est. effect_interval( X, T0=T0, T1=T)), (2, ) + effect_shape) # TODO: add tests for extra properties like coef_ where they exist score() # make sure we can call effect with implied scalar treatments, # no matter the dimensions of T, and also that we warn when there # are multiple treatments if d_t > 1: cm = self.assertWarns( Warning) else: # ExitStack can be used as a "do nothing" ContextManager cm = ExitStack() with cm: effect_shape2 = ( n if d_x else 1, ) + ( (d_y, ) if d_y > 0 else ()) eff = est.effect( X ) if not discrete_t else est.effect( X, T0='a', T1='b') self.assertEqual( shape(eff), effect_shape2)
def test_cate_api(self): def const_marg_eff_shape(n, d_x, d_y, binary_T): return (n if d_x else 1, ) + ((d_y, ) if d_y > 1 else ()) + ((1, ) if binary_T else ()) def marg_eff_shape(n, d_y, binary_T): return (n, ) + ((d_y, ) if d_y > 1 else ()) + ((1, ) if binary_T else ()) def eff_shape(n, d_x, d_y): return (n if d_x else 1, ) + ((d_y, ) if d_y > 1 else ()) n = 1000 y = np.random.normal(size=(n, )) for d_y in [1, 3]: if d_y == 1: y = np.random.normal(size=(n, )) else: y = y = np.random.normal(size=(n, d_y)) for d_w in [None, 10]: if d_w is None: W = None else: W = np.random.normal(size=(n, d_w)) for d_x in [None, 3]: if d_x is None: X = None else: X = np.random.normal(size=(n, d_x)) for binary_Z in [True, False]: if binary_Z: Z = np.random.choice([3, 4], size=(n, )) else: Z = np.random.normal(1, 3, size=(n, )) for binary_T in [True, False]: if binary_T: T = np.random.choice([0, 1], size=(n, )) else: T = np.random.uniform(1, 3, size=(n, )) + 0.5 * Z for featurizer in [ None, PolynomialFeatures(degree=2, include_bias=False), ]: est_list = [ OrthoIV( projection=False, featurizer=featurizer, discrete_treatment=binary_T, discrete_instrument=binary_Z, ), OrthoIV( projection=True, featurizer=featurizer, discrete_treatment=binary_T, discrete_instrument=binary_Z, ), DMLIV( model_final=LinearRegression( fit_intercept=False), featurizer=featurizer, discrete_treatment=binary_T, discrete_instrument=binary_Z, ), NonParamDMLIV( model_final=RandomForestRegressor(), featurizer=featurizer, discrete_treatment=binary_T, discrete_instrument=binary_Z, ), ] if X is None: est_list = est_list[:-1] for est in est_list: with self.subTest(d_w=d_w, d_x=d_x, binary_T=binary_T, binary_Z=binary_Z, featurizer=featurizer, est=est): # ensure we can serialize unfit estimator pickle.dumps(est) est.fit(y, T, Z=Z, X=X, W=W) # ensure we can serialize fit estimator pickle.dumps(est) # expected effect size const_marginal_effect_shape = const_marg_eff_shape( n, d_x, d_y, binary_T) marginal_effect_shape = marg_eff_shape( n, d_y, binary_T) effect_shape = eff_shape(n, d_x, d_y) # test effect const_marg_eff = est.const_marginal_effect( X) self.assertEqual( shape(const_marg_eff), const_marginal_effect_shape) marg_eff = est.marginal_effect(T, X) self.assertEqual( shape(marg_eff), marginal_effect_shape) eff = est.effect(X, T0=0, T1=1) self.assertEqual( shape(eff), effect_shape) # test inference # only OrthoIV support inference other than bootstrap if isinstance(est, OrthoIV): const_marg_eff_int = est.const_marginal_effect_interval( X) marg_eff_int = est.marginal_effect_interval( T, X) eff_int = est.effect_interval(X, T0=0, T1=1) self.assertEqual( shape(const_marg_eff_int), (2, ) + const_marginal_effect_shape) self.assertEqual( shape(marg_eff_int), (2, ) + marginal_effect_shape) self.assertEqual( shape(eff_int), (2, ) + effect_shape) # test summary if isinstance(est, (OrthoIV, DMLIV)): est.summary() # test can run score est.score(y, T, Z, X=X, W=W) if X is not None: # test cate_feature_names expect_feat_len = featurizer.fit( X ).n_output_features_ if featurizer else d_x self.assertEqual( len(est.cate_feature_names()), expect_feat_len) # test can run shap values shap_values = est.shap_values( X[:10])
def test_cate_api(self): """Test that we correctly implement the CATE API.""" n = 20 def make_random(is_discrete, d): if d is None: return None sz = (n, d) if d >= 0 else (n, ) if is_discrete: while True: arr = np.random.choice(['a', 'b', 'c'], size=sz) # ensure that we've got at least two of every element _, counts = np.unique(arr, return_counts=True) if len(counts) == 3 and counts.min() > 1: return arr else: return np.random.normal(size=sz) for d_t in [2, 1, -1]: for is_discrete in [True, False] if d_t <= 1 else [False]: for d_y in [3, 1, -1]: for d_x in [2, None]: for d_w in [2, None]: W, X, Y, T = [ make_random(is_discrete, d) for is_discrete, d in [( False, d_w), (False, d_x), (False, d_y), (is_discrete, d_t)] ] d_t_final = 2 if is_discrete else d_t effect_shape = (n, ) + ((d_y, ) if d_y > 0 else ()) marginal_effect_shape = ((n, ) + ( (d_y, ) if d_y > 0 else ()) + ((d_t_final, ) if d_t_final > 0 else ())) # since T isn't passed to const_marginal_effect, defaults to one row if X is None const_marginal_effect_shape = ( (n if d_x else 1, ) + ((d_y, ) if d_y > 0 else ()) + ((d_t_final, ) if d_t_final > 0 else ())) model_t = LogisticRegression( ) if is_discrete else Lasso() # TODO: add stratification to bootstrap so that we can use it even with discrete treatments all_infs = [None, 'statsmodels'] if not is_discrete: all_infs.append(BootstrapInference(1)) for est, multi, infs in [ (LinearDMLCateEstimator( model_y=Lasso(), model_t='auto', discrete_treatment=is_discrete), False, all_infs), (SparseLinearDMLCateEstimator( model_y=LinearRegression(), model_t=model_t, discrete_treatment=is_discrete), True, [None]), (KernelDMLCateEstimator( model_y=LinearRegression(), model_t=model_t, discrete_treatment=is_discrete), False, [None]) ]: if not (multi) and d_y > 1: continue for inf in infs: with self.subTest(d_w=d_w, d_x=d_x, d_y=d_y, d_t=d_t, is_discrete=is_discrete, est=est, inf=inf): est.fit(Y, T, X, W, inference=inf) # make sure we can call the marginal_effect and effect methods const_marg_eff = est.const_marginal_effect( X) marg_eff = est.marginal_effect(T, X) self.assertEqual( shape(marg_eff), marginal_effect_shape) self.assertEqual( shape(const_marg_eff), const_marginal_effect_shape) np.testing.assert_array_equal( marg_eff if d_x else marg_eff[0:1], const_marg_eff) T0 = np.full_like( T, 'a' ) if is_discrete else np.zeros_like(T) eff = est.effect(X, T0=T0, T1=T) self.assertEqual( shape(eff), effect_shape) if inf is not None: const_marg_eff_int = est.const_marginal_effect_interval( X) marg_eff_int = est.marginal_effect_interval( T, X) self.assertEqual( shape(marg_eff_int), (2, ) + marginal_effect_shape) self.assertEqual( shape(const_marg_eff_int), (2, ) + const_marginal_effect_shape) self.assertEqual( shape( est.effect_interval(X, T0=T0, T1=T)), (2, ) + effect_shape) est.score(Y, T, X, W) # make sure we can call effect with implied scalar treatments, no matter the # dimensions of T, and also that we warn when there are multiple treatments if d_t > 1: cm = self.assertWarns(Warning) else: cm = ExitStack( ) # ExitStack can be used as a "do nothing" ContextManager with cm: effect_shape2 = ( n if d_x else 1, ) + ( (d_y, ) if d_y > 0 else ()) eff = est.effect( X ) if not is_discrete else est.effect( X, T0='a', T1='b') self.assertEqual( shape(eff), effect_shape2)
def test_cate_api(self): """Test that we correctly implement the CATE API.""" n = 20 def make_random(is_discrete, d): if d is None: return None sz = (n, d) if d > 0 else (n,) if is_discrete: while True: arr = np.random.choice(['a', 'b', 'c'], size=sz) # ensure that we've got at least two of every element _, counts = np.unique(arr, return_counts=True) if len(counts) == 3 and counts.min() > 1: return arr else: return np.random.normal(size=sz) for d_y in [0, 1]: is_discrete = True for d_t in [0, 1]: for d_x in [2, None]: for d_w in [2, None]: W, X, Y, T = [make_random(is_discrete, d) for is_discrete, d in [(False, d_w), (False, d_x), (False, d_y), (is_discrete, d_t)]] if (X is None) and (W is None): continue d_t_final = 2 if is_discrete else d_t effect_shape = (n,) + ((d_y,) if d_y > 0 else ()) effect_summaryframe_shape = ( n * (d_y if d_y > 0 else 1), 6) marginal_effect_shape = ((n,) + ((d_y,) if d_y > 0 else ()) + ((d_t_final,) if d_t_final > 0 else ())) marginal_effect_summaryframe_shape = (n * (d_y if d_y > 0 else 1), 6 * (d_t_final if d_t_final > 0 else 1)) # since T isn't passed to const_marginal_effect, defaults to one row if X is None const_marginal_effect_shape = ((n if d_x else 1,) + ((d_y,) if d_y > 0 else ()) + ((d_t_final,) if d_t_final > 0 else())) const_marginal_effect_summaryframe_shape = ( (n if d_x else 1) * (d_y if d_y > 0 else 1), 6 * (d_t_final if d_t_final > 0 else 1)) for est in [LinearDRLearner(model_propensity=LogisticRegression(C=1000, solver='lbfgs', multi_class='auto')), DRLearner(model_propensity=LogisticRegression(multi_class='auto'), model_regression=LinearRegression(), model_final=StatsModelsLinearRegression(), multitask_model_final=True)]: # TODO: add stratification to bootstrap so that we can use it even with discrete treatments infs = [None] if isinstance(est, LinearDRLearner): infs.append('statsmodels') for inf in infs: with self.subTest(d_w=d_w, d_x=d_x, d_y=d_y, d_t=d_t, is_discrete=is_discrete, est=est, inf=inf): est.fit(Y, T, X, W, inference=inf) # make sure we can call the marginal_effect and effect methods const_marg_eff = est.const_marginal_effect( X) marg_eff = est.marginal_effect(T, X) self.assertEqual( shape(marg_eff), marginal_effect_shape) self.assertEqual( shape(const_marg_eff), const_marginal_effect_shape) np.testing.assert_array_equal( marg_eff if d_x else marg_eff[0:1], const_marg_eff) T0 = np.full_like(T, 'a') eff = est.effect(X, T0=T0, T1=T) self.assertEqual(shape(eff), effect_shape) if inf is not None: const_marg_eff_int = est.const_marginal_effect_interval( X) marg_eff_int = est.marginal_effect_interval( T, X) const_marg_effect_inf = est.const_marginal_effect_inference( X) T1 = np.full_like(T, 'b') effect_inf = est.effect_inference( X, T0=T0, T1=T1) marg_effect_inf = est.marginal_effect_inference( T, X) self.assertEqual(shape(marg_eff_int), (2,) + marginal_effect_shape) self.assertEqual(shape(const_marg_eff_int), (2,) + const_marginal_effect_shape) self.assertEqual(shape(est.effect_interval(X, T0=T0, T1=T)), (2,) + effect_shape) # test const marginal inference self.assertEqual(shape(const_marg_effect_inf.summary_frame()), const_marginal_effect_summaryframe_shape) self.assertEqual(shape(const_marg_effect_inf.point_estimate), const_marginal_effect_shape) self.assertEqual(shape(const_marg_effect_inf.stderr), const_marginal_effect_shape) self.assertEqual(shape(const_marg_effect_inf.var), const_marginal_effect_shape) self.assertEqual(shape(const_marg_effect_inf.pvalue()), const_marginal_effect_shape) self.assertEqual(shape(const_marg_effect_inf.zstat()), const_marginal_effect_shape) self.assertEqual(shape(const_marg_effect_inf.conf_int()), (2,) + const_marginal_effect_shape) np.testing.assert_array_almost_equal(const_marg_effect_inf.conf_int() [0], const_marg_eff_int[0], decimal=5) const_marg_effect_inf.population_summary()._repr_html_() # test effect inference self.assertEqual(shape(effect_inf.summary_frame()), effect_summaryframe_shape) self.assertEqual(shape(effect_inf.point_estimate), effect_shape) self.assertEqual(shape(effect_inf.stderr), effect_shape) self.assertEqual(shape(effect_inf.var), effect_shape) self.assertEqual(shape(effect_inf.pvalue()), effect_shape) self.assertEqual(shape(effect_inf.zstat()), effect_shape) self.assertEqual(shape(effect_inf.conf_int()), (2,) + effect_shape) np.testing.assert_array_almost_equal(effect_inf.conf_int() [0], est.effect_interval( X, T0=T0, T1=T1) [0], decimal=5) effect_inf.population_summary()._repr_html_() # test marginal effect inference self.assertEqual(shape(marg_effect_inf.summary_frame()), marginal_effect_summaryframe_shape) self.assertEqual(shape(marg_effect_inf.point_estimate), marginal_effect_shape) self.assertEqual(shape(marg_effect_inf.stderr), marginal_effect_shape) self.assertEqual(shape(marg_effect_inf.var), marginal_effect_shape) self.assertEqual(shape(marg_effect_inf.pvalue()), marginal_effect_shape) self.assertEqual(shape(marg_effect_inf.zstat()), marginal_effect_shape) self.assertEqual(shape(marg_effect_inf.conf_int()), (2,) + marginal_effect_shape) np.testing.assert_array_almost_equal(marg_effect_inf.conf_int() [0], marg_eff_int[0], decimal=5) marg_effect_inf.population_summary()._repr_html_() est.score(Y, T, X, W) # make sure we can call effect with implied scalar treatments, no matter the # dimensions of T, and also that we warn when there are multiple treatments if d_t > 1: cm = self.assertWarns(Warning) else: cm = ExitStack() # ExitStack can be used as a "do nothing" ContextManager with cm: effect_shape2 = ( n if d_x else 1,) + ((d_y,) if d_y > 0 else()) eff = est.effect(X, T0='a', T1='b') self.assertEqual( shape(eff), effect_shape2)
def test_cate_api_nonparam(self): """Test that we correctly implement the CATE API.""" n = 20 def make_random(is_discrete, d): if d is None: return None sz = (n, d) if d >= 0 else (n, ) if is_discrete: while True: arr = np.random.choice(['a', 'b'], size=sz) # ensure that we've got at least two of every element _, counts = np.unique(arr, return_counts=True) if len(counts) == 2 and counts.min() > 2: return arr else: return np.random.normal(size=sz) for d_t in [1, -1]: for is_discrete in [True, False] if d_t <= 1 else [False]: for d_y in [3, 1, -1]: for d_x in [2, None]: for d_w in [2, None]: W, X, Y, T = [ make_random(is_discrete, d) for is_discrete, d in [( False, d_w), (False, d_x), (False, d_y), (is_discrete, d_t)] ] d_t_final = 1 if is_discrete else d_t effect_shape = (n, ) + ((d_y, ) if d_y > 0 else ()) marginal_effect_shape = ((n, ) + ( (d_y, ) if d_y > 0 else ()) + ((d_t_final, ) if d_t_final > 0 else ())) # since T isn't passed to const_marginal_effect, defaults to one row if X is None const_marginal_effect_shape = ( (n if d_x else 1, ) + ((d_y, ) if d_y > 0 else ()) + ((d_t_final, ) if d_t_final > 0 else ())) model_t = LogisticRegression( ) if is_discrete else WeightedLasso() # TODO Add bootstrap inference, once discrete treatment issue is fixed base_infs = [None] if not is_discrete: base_infs += [BootstrapInference(2)] for est, multi, infs in [ (NonParamDMLCateEstimator( model_y=WeightedLasso(), model_t=model_t, model_final=WeightedLasso(), featurizer=None, discrete_treatment=is_discrete), True, base_infs), (NonParamDMLCateEstimator( model_y=WeightedLasso(), model_t=model_t, model_final=WeightedLasso(), featurizer=FunctionTransformer(), discrete_treatment=is_discrete), True, base_infs), (ForestDMLCateEstimator( model_y=WeightedLasso(), model_t=model_t, discrete_treatment=is_discrete), True, base_infs + ['blb']) ]: if not (multi) and d_y > 1: continue for inf in infs: with self.subTest(d_w=d_w, d_x=d_x, d_y=d_y, d_t=d_t, is_discrete=is_discrete, est=est, inf=inf): if X is None: with pytest.raises(AttributeError): est.fit(Y, T, X, W, inference=inf) continue est.fit(Y, T, X, W, inference=inf) # make sure we can call the marginal_effect and effect methods const_marg_eff = est.const_marginal_effect( X) marg_eff = est.marginal_effect(T, X) self.assertEqual( shape(marg_eff), marginal_effect_shape) self.assertEqual( shape(const_marg_eff), const_marginal_effect_shape) np.testing.assert_array_equal( marg_eff if d_x else marg_eff[0:1], const_marg_eff) T0 = np.full_like( T, 'a' ) if is_discrete else np.zeros_like(T) eff = est.effect(X, T0=T0, T1=T) self.assertEqual( shape(eff), effect_shape) if inf is not None: const_marg_eff_int = est.const_marginal_effect_interval( X) marg_eff_int = est.marginal_effect_interval( T, X) self.assertEqual( shape(marg_eff_int), (2, ) + marginal_effect_shape) self.assertEqual( shape(const_marg_eff_int), (2, ) + const_marginal_effect_shape) self.assertEqual( shape( est.effect_interval(X, T0=T0, T1=T)), (2, ) + effect_shape) est.score(Y, T, X, W) # make sure we can call effect with implied scalar treatments, no matter the # dimensions of T, and also that we warn when there are multiple treatments if d_t > 1: cm = self.assertWarns(Warning) else: cm = ExitStack( ) # ExitStack can be used as a "do nothing" ContextManager with cm: effect_shape2 = ( n if d_x else 1, ) + ( (d_y, ) if d_y > 0 else ()) eff = est.effect( X ) if not is_discrete else est.effect( X, T0='a', T1='b') self.assertEqual( shape(eff), effect_shape2)
def transform(self, X): assert self._is_fitted assert shape(X)[1] == 0 return np.tile(self._features, (shape(X)[0], 1, 1, 1))
def fit(self, X): self._is_fitted = True assert shape(X)[1] == 0 return self
def transform(self, X): return np.ones((shape(X)[0], 1))
def test_cate_api(self): def const_marg_eff_shape(n, d_x, binary_T): """Constant marginal effect shape.""" return (n if d_x else 1,) + ((1,) if binary_T else ()) def marg_eff_shape(n, binary_T): """Marginal effect shape.""" return (n,) + ((1,) if binary_T else ()) def eff_shape(n, d_x): "Effect shape." return (n if d_x else 1,) n = 500 y = np.random.normal(size=(n,)) # parameter combinations to test for d_w, d_x, binary_T, binary_Z, projection, featurizer\ in itertools.product( [None, 10], # d_w [None, 3], # d_x [True, False], # binary_T [True, False], # binary_Z [True, False], # projection [None, PolynomialFeatures(degree=2, include_bias=False), ]): # featurizer if d_w is None: W = None else: W = np.random.normal(size=(n, d_w)) if d_x is None: X = None else: X = np.random.normal(size=(n, d_x)) if binary_T: T = np.random.choice(["a", "b"], size=(n,)) else: T = np.random.normal(size=(n,)) if binary_Z: Z = np.random.choice(["c", "d"], size=(n,)) else: Z = np.random.normal(size=(n,)) est_list = [ DRIV( flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False), model_final=StatsModelsLinearRegression( fit_intercept=False ), fit_cate_intercept=True, projection=projection, discrete_instrument=binary_Z, discrete_treatment=binary_T, featurizer=featurizer, ), LinearDRIV( flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False), fit_cate_intercept=True, projection=projection, discrete_instrument=binary_Z, discrete_treatment=binary_T, featurizer=featurizer, ), SparseLinearDRIV( flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False), fit_cate_intercept=True, projection=projection, discrete_instrument=binary_Z, discrete_treatment=binary_T, featurizer=featurizer, ), ForestDRIV( flexible_model_effect=StatsModelsLinearRegression(fit_intercept=False), projection=projection, discrete_instrument=binary_Z, discrete_treatment=binary_T, featurizer=featurizer, ), ] if X is None: est_list = est_list[:-1] if binary_T and binary_Z: est_list += [ IntentToTreatDRIV( flexible_model_effect=StatsModelsLinearRegression( fit_intercept=False ), fit_cate_intercept=True, featurizer=featurizer, ), LinearIntentToTreatDRIV( flexible_model_effect=StatsModelsLinearRegression( fit_intercept=False ), featurizer=featurizer, ), ] for est in est_list: with self.subTest(d_w=d_w, d_x=d_x, binary_T=binary_T, binary_Z=binary_Z, projection=projection, featurizer=featurizer, est=est): # TODO: serializing/deserializing for every combination -- is this necessary? # ensure we can serialize unfit estimator pickle.dumps(est) est.fit(y, T, Z=Z, X=X, W=W) # ensure we can serialize fit estimator pickle.dumps(est) # expected effect size exp_const_marginal_effect_shape = const_marg_eff_shape(n, d_x, binary_T) marginal_effect_shape = marg_eff_shape(n, binary_T) effect_shape = eff_shape(n, d_x) # assert calculated constant marginal effect shape is expected # const_marginal effect is defined in LinearCateEstimator class const_marg_eff = est.const_marginal_effect(X) self.assertEqual(shape(const_marg_eff), exp_const_marginal_effect_shape) # assert calculated marginal effect shape is expected marg_eff = est.marginal_effect(T, X) self.assertEqual(shape(marg_eff), marginal_effect_shape) T0 = "a" if binary_T else 0 T1 = "b" if binary_T else 1 eff = est.effect(X, T0=T0, T1=T1) self.assertEqual(shape(eff), effect_shape) # test inference const_marg_eff_int = est.const_marginal_effect_interval(X) marg_eff_int = est.marginal_effect_interval(T, X) eff_int = est.effect_interval(X, T0=T0, T1=T1) self.assertEqual(shape(const_marg_eff_int), (2,) + exp_const_marginal_effect_shape) self.assertEqual(shape(marg_eff_int), (2,) + marginal_effect_shape) self.assertEqual(shape(eff_int), (2,) + effect_shape) # test can run score est.score(y, T, Z=Z, X=X, W=W) if X is not None: # test cate_feature_names expect_feat_len = featurizer.fit( X).n_output_features_ if featurizer else d_x self.assertEqual(len(est.cate_feature_names()), expect_feat_len) # test can run shap values _ = est.shap_values(X[:10])
def test_cate_api(self): def const_marg_eff_shape(n, d_x, binary_T): return (n if d_x else 1, ) + ((1, ) if binary_T else ()) def marg_eff_shape(n, binary_T): return (n, ) + ((1, ) if binary_T else ()) def eff_shape(n, d_x): return (n if d_x else 1, ) n = 1000 y = np.random.normal(size=(n, )) for d_w in [None, 10]: if d_w is None: W = None else: W = np.random.normal(size=(n, d_w)) for d_x in [None, 3]: if d_x is None: X = None else: X = np.random.normal(size=(n, d_x)) for binary_T in [True, False]: if binary_T: T = np.random.choice(["a", "b"], size=(n, )) else: T = np.random.normal(size=(n, )) for binary_Z in [True, False]: if binary_Z: Z = np.random.choice(["c", "d"], size=(n, )) else: Z = np.random.normal(size=(n, )) for projection in [True, False]: for featurizer in [ None, PolynomialFeatures(degree=2, include_bias=False), ]: est_list = [ DRIV( flexible_model_effect= StatsModelsLinearRegression( fit_intercept=False), model_final=StatsModelsLinearRegression( fit_intercept=False), fit_cate_intercept=True, projection=projection, discrete_instrument=binary_Z, discrete_treatment=binary_T, featurizer=featurizer, ), LinearDRIV( flexible_model_effect= StatsModelsLinearRegression( fit_intercept=False), fit_cate_intercept=True, projection=projection, discrete_instrument=binary_Z, discrete_treatment=binary_T, featurizer=featurizer, ), SparseLinearDRIV( flexible_model_effect= StatsModelsLinearRegression( fit_intercept=False), fit_cate_intercept=True, projection=projection, discrete_instrument=binary_Z, discrete_treatment=binary_T, featurizer=featurizer, ), ForestDRIV( flexible_model_effect= StatsModelsLinearRegression( fit_intercept=False), projection=projection, discrete_instrument=binary_Z, discrete_treatment=binary_T, featurizer=featurizer, ), ] if X is None: est_list = est_list[:-1] if binary_T and binary_Z: est_list += [ IntentToTreatDRIV( flexible_model_effect= StatsModelsLinearRegression( fit_intercept=False), fit_cate_intercept=True, featurizer=featurizer, ), LinearIntentToTreatDRIV( flexible_model_effect= StatsModelsLinearRegression( fit_intercept=False), featurizer=featurizer, ), ] for est in est_list: with self.subTest(d_w=d_w, d_x=d_x, binary_T=binary_T, binary_Z=binary_Z, projection=projection, featurizer=featurizer, est=est): # ensure we can serialize unfit estimator pickle.dumps(est) est.fit(y, T, Z=Z, X=X, W=W) # ensure we can serialize fit estimator pickle.dumps(est) # expected effect size const_marginal_effect_shape = const_marg_eff_shape( n, d_x, binary_T) marginal_effect_shape = marg_eff_shape( n, binary_T) effect_shape = eff_shape(n, d_x) # test effect const_marg_eff = est.const_marginal_effect( X) self.assertEqual( shape(const_marg_eff), const_marginal_effect_shape) marg_eff = est.marginal_effect(T, X) self.assertEqual( shape(marg_eff), marginal_effect_shape) T0 = "a" if binary_T else 0 T1 = "b" if binary_T else 1 eff = est.effect(X, T0=T0, T1=T1) self.assertEqual( shape(eff), effect_shape) # test inference const_marg_eff_int = est.const_marginal_effect_interval( X) marg_eff_int = est.marginal_effect_interval( T, X) eff_int = est.effect_interval(X, T0=T0, T1=T1) self.assertEqual( shape(const_marg_eff_int), (2, ) + const_marginal_effect_shape) self.assertEqual( shape(marg_eff_int), (2, ) + marginal_effect_shape) self.assertEqual( shape(eff_int), (2, ) + effect_shape) # test can run score est.score(y, T, Z=Z, X=X, W=W) if X is not None: # test cate_feature_names expect_feat_len = featurizer.fit( X ).n_output_features_ if featurizer else d_x self.assertEqual( len(est.cate_feature_names()), expect_feat_len) # test can run shap values shap_values = est.shap_values( X[:10])