def test_2sls(self): n = 50000 e = np.random.uniform(low=-0.5, high=0.5, size=(n, 1)) z = np.random.uniform(size=(n, 1)) x = np.random.uniform(size=(n, 1)) + e p = x + z * e + np.random.uniform(size=(n, 1)) y = p * x + e losses = [] marg_effs = [] z_fresh = np.random.uniform(size=(n, 1)) e_fresh = np.random.uniform(low=-0.5, high=0.5, size=(n, 1)) x_fresh = np.random.uniform(size=(n, 1)) + e_fresh p_fresh = x_fresh + z_fresh * e_fresh + np.random.uniform(size=(n, 1)) for (dt, dx, dz) in [(0, 0, 0), (1, 1, 1), (5, 5, 5), (10, 10, 10), (3, 3, 10), (10, 10, 3)]: np2sls = NonparametricTwoStageLeastSquares( HermiteFeatures(dt), HermiteFeatures(dx), HermiteFeatures(dz), HermiteFeatures(dt, shift=1)) np2sls.fit(y, p, x, z) effect = np2sls.effect(x_fresh, np.zeros(shape(p_fresh)), p_fresh) losses.append(np.mean(np.square(p_fresh * x_fresh - effect))) marg_effs.append( np2sls.marginal_effect(np.array([[0.3], [0.5], [0.7]]), np.array([[0.4], [0.6], [0.2]]))) print("losses: {}".format(losses)) print("marg_effs: {}".format(marg_effs))
def test_marg_eff(self): X = np.random.normal(size=(5000, 2)) Z = np.random.normal(size=(5000, 2)) W = np.random.normal(size=(5000, 1)) # Note: no noise, just testing that we can exactly recover when we ought to be able to T = np.hstack([np.cross(X, Z).reshape(-1, 1) + W, (np.prod(X, axis=1) + np.prod(Z, axis=1)).reshape(-1, 1)]) Y = X * T + X**2 est = NonparametricTwoStageLeastSquares( t_featurizer=PolynomialFeatures(degree=2, interaction_only=False, include_bias=True), x_featurizer=PolynomialFeatures(degree=2, interaction_only=False, include_bias=True), z_featurizer=PolynomialFeatures(degree=2, interaction_only=False, include_bias=True), dt_featurizer=DPolynomialFeatures(degree=2, interaction_only=False, include_bias=True)) est.fit(Y, T, X, W, Z) # pick some arbitrary X X_test = np.array([[0.3, 0.7], [0.2, 0.1]]) eff = est.effect(X_test) # effect = (X * 1 + X^2) - (X * 0 + X^2) = X np.testing.assert_almost_equal(eff, X_test) # pick some arbitrary T T_test = np.array([[-0.3, 0.1], [0.6, -1.2]]) marg_eff = est.marginal_effect(T_test, X_test) # marg effect_{i,j} = X_i if i=j, 0 otherwise marg_eff_truth = np.zeros((X_test.shape[0], Y.shape[1], T.shape[1])) marg_eff_truth[:, range(X.shape[1]), range(X.shape[1])] = X_test[:, :] np.testing.assert_almost_equal(marg_eff, marg_eff_truth)
def test_internal_options(self): """Test that the internal use of bootstrap within an estimator using custom options works.""" x = np.random.normal(size=(1000, 2)) z = np.random.normal(size=(1000, 1)) t = np.random.normal(size=(1000, 1)) t2 = np.random.normal(size=(1000, 1)) y = x[:, 0] * 0.5 + t + np.random.normal(size=(1000, 1)) opts = BootstrapOptions(50, 2) est = NonparametricTwoStageLeastSquares(PolynomialFeatures(2), PolynomialFeatures(2), PolynomialFeatures(2), None, inference=opts) est.fit(y, t, x, None, z) # test that we can get an interval for the same attribute for the bootstrap as the original, # with the same shape for the lower and upper bounds lower, upper = est.effect_interval(x, t, t2) for bound in [lower, upper]: self.assertEqual(np.shape(est.effect(x, t, t2)), np.shape(bound)) self.assertFalse(np.allclose(lower, upper)) # test that we can do the same thing once we provide percentile bounds lower, upper = est.effect_interval(x, t, t2, lower=10, upper=90) for bound in [lower, upper]: self.assertEqual(np.shape(est.effect(x, t, t2)), np.shape(bound)) self.assertFalse(np.allclose(lower, upper))
def test_2sls_shape(self): n = 100 def make_random(d): sz = (n, d) if d >= 0 else (n,) return np.random.normal(size=sz) for d_t in [-1, 1, 2]: n_t = d_t if d_t > 0 else 1 for d_y in [-1, 1, 2]: for d_x in [1, 5]: for d_z in [1, 2]: d_w = 1 if d_z >= n_t: T, Y, X, Z, W = [make_random(d) for d in [d_t, d_y, d_x, d_z, d_w]] est = NonparametricTwoStageLeastSquares( t_featurizer=PolynomialFeatures(), x_featurizer=PolynomialFeatures(), z_featurizer=PolynomialFeatures(), dt_featurizer=DPolynomialFeatures()) est.fit(Y, T, X=X, W=W, Z=Z) eff = est.effect(X) marg_eff = est.marginal_effect(T, X) effect_shape = (n,) + ((d_y,) if d_y > 0 else ()) marginal_effect_shape = ((n if d_x else 1,) + ((d_y,) if d_y > 0 else ()) + ((d_t,) if d_t > 0 else())) self.assertEqual(shape(marg_eff), marginal_effect_shape) self.assertEqual(shape(eff), effect_shape)
def test_internal_options(self): """Test that the internal use of bootstrap within an estimator using custom options works.""" x = np.random.normal(size=(1000, 2)) z = np.random.normal(size=(1000, 1)) t = np.random.normal(size=(1000, 1)) t2 = np.random.normal(size=(1000, 1)) y = x[:, 0:1] * 0.5 + t + np.random.normal(size=(1000, 1)) opts = BootstrapInference(50, 2) est = NonparametricTwoStageLeastSquares(PolynomialFeatures(2), PolynomialFeatures(2), PolynomialFeatures(2), None) est.fit(y, t, x, None, z, inference=opts) # test that we can get an interval for the same attribute for the bootstrap as the original, # with the same shape for the lower and upper bounds eff = est.effect(x, t, t2) lower, upper = est.effect_interval(x, T0=t, T1=t2) for bound in [lower, upper]: self.assertEqual(np.shape(eff), np.shape(bound)) # test that the lower and upper bounds differ assert (lower <= upper).all() assert (lower < upper).any() # test that the estimated effect is usually within the bounds assert np.mean(np.logical_and(lower <= eff, eff <= upper)) >= 0.7 # test that we can do the same thing once we provide percentile bounds lower, upper = est.effect_interval(x, T0=t, T1=t2, alpha=0.2) for bound in [lower, upper]: self.assertEqual(np.shape(eff), np.shape(bound)) # test that the lower and upper bounds differ assert (lower <= upper).all() assert (lower < upper).any() # test that the estimated effect is usually within the bounds assert np.mean(np.logical_and(lower <= eff, eff <= upper)) >= 0.65
def test_2sls(self): n = 50000 d_w = 2 d_z = 1 d_x = 1 d_t = 1 d_y = 1 e = np.random.uniform(low=-0.5, high=0.5, size=(n, d_x)) z = np.random.uniform(size=(n, 1)) w = np.random.uniform(size=(n, d_w)) a = np.random.normal(size=(d_w, d_t)) b = np.random.normal(size=(d_w, d_y)) x = np.random.uniform(size=(n, d_x)) + e p = x + z * e + w @ a + np.random.uniform(size=(n, d_t)) y = p * x + e + w @ b losses = [] marg_effs = [] z_fresh = np.random.uniform(size=(n, d_z)) e_fresh = np.random.uniform(low=-0.5, high=0.5, size=(n, d_x)) x_fresh = np.random.uniform(size=(n, d_x)) + e_fresh w_fresh = np.random.uniform(size=(n, d_w)) p_fresh = x_fresh + z_fresh * e_fresh + np.random.uniform(size=(n, d_t)) for (dt, dx, dz) in [(0, 0, 0), (1, 1, 1), (5, 5, 5), (10, 10, 10), (3, 3, 10), (10, 10, 3)]: np2sls = NonparametricTwoStageLeastSquares(t_featurizer=HermiteFeatures(dt), x_featurizer=HermiteFeatures(dx), z_featurizer=HermiteFeatures(dz), dt_featurizer=HermiteFeatures(dt, shift=1)) np2sls.fit(y, p, X=x, W=w, Z=z) effect = np2sls.effect(x_fresh, np.zeros(shape(p_fresh)), p_fresh) losses.append(np.mean(np.square(p_fresh * x_fresh - effect))) marg_effs.append(np2sls.marginal_effect(np.array([[0.3], [0.5], [0.7]]), np.array([[0.4], [0.6], [0.2]]))) print("losses: {}".format(losses)) print("marg_effs: {}".format(marg_effs))