Exemplo n.º 1
0
    def _nuisance_estimates(self, y, T, X, Z):
        n_samples = y.shape[0]
        prel_theta = np.zeros(n_samples)
        res_t = np.zeros(n_samples)
        res_y = np.zeros(n_samples)
        delta = np.zeros(n_samples)

        splits = self._get_split_enum(y, T, X, Z)
        for idx, (train, test) in enumerate(splits):
            # Estimate preliminary theta in cross fitting manner
            prel_theta[test] = self.prel_model_effect[idx].fit(
                y[train], T[train], X[train],
                Z[train]).effect(X[test]).flatten()
            # Estimate p(X) = E[T | X] in cross fitting manner
            self.model_T_XZ[idx].fit(
                hstack([X[train], Z[train].reshape(-1, 1)]), T[train])
            Z_one = np.ones((Z[test].shape[0], 1))
            Z_zero = np.zeros((Z[test].shape[0], 1))
            pr_t_test_one = self.model_T_XZ[idx].predict(
                hstack([X[test], Z_one]))
            pr_t_test_zero = self.model_T_XZ[idx].predict(
                hstack([X[test], Z_zero]))
            delta[test] = (pr_t_test_one - pr_t_test_zero) / 2
            pr_t_test = (pr_t_test_one + pr_t_test_zero) / 2
            res_t[test] = T[test] - pr_t_test
            # Estimate residual Y_res = Y - q(X) = Y - E[Y | X] in cross fitting manner
            res_y[test] = y[test] - \
                self.model_Y_X[idx].fit(X[train], y[train]).predict(X[test])

        return prel_theta, res_t, res_y, 2 * Z - 1, delta
Exemplo n.º 2
0
    def _generate_recoverable_errors(a_X,
                                     X,
                                     a_W=None,
                                     W=None,
                                     featurizer=FunctionTransformer()):
        """Return error vectors e_t and e_y such that OLS can recover the true coefficients from both stages."""
        if W is None:
            W = np.empty((shape(X)[0], 0))
        if a_W is None:
            a_W = np.zeros((shape(W)[1], ))
        # to correctly recover coefficients for T via OLS, we need e_t to be orthogonal to [W;X]
        WX = hstack([W, X])
        e_t = rand_sol(WX.T, np.zeros((shape(WX)[1], )))

        # to correctly recover coefficients for Y via OLS, we need ([X; W]⊗[1; ϕ(X); W])⁺ e_y =
        #                                                          -([X; W]⊗[1; ϕ(X); W])⁺ ((ϕ(X)⊗e_t)a_X+(W⊗e_t)a_W)
        # then, to correctly recover a in the third stage, we additionally need (ϕ(X)⊗e_t)ᵀ e_y = 0

        ϕ = featurizer.fit_transform(X)

        v_X = cross_product(ϕ, e_t)
        v_W = cross_product(W, e_t)

        M = np.linalg.pinv(
            cross_product(WX, hstack([np.ones((shape(WX)[0], 1)), ϕ, W])))
        e_y = rand_sol(
            vstack([M, v_X.T]),
            vstack([-M @ (v_X @ a_X + v_W @ a_W),
                    np.zeros((shape(v_X)[1], ))]))

        return e_t, e_y
Exemplo n.º 3
0
    def fit(self, y, T, X, Z):
        """
        Parameters
        ----------
        y : outcome
        T : treatment (single dimensional)
        X : features/controls
        Z : instrument
        """
        if len(T.shape) > 1 and T.shape[1] > 1:
            raise AssertionError(
                "Can only accept single dimensional treatment")
        if len(y.shape) > 1 and y.shape[1] > 1:
            raise AssertionError("Can only accept single dimensional outcome")
        if len(Z.shape) == 1:
            Z = Z.reshape(-1, 1)
        if (Z.shape[1] > 1) and self.binary_instrument:
            raise AssertionError(
                "Binary instrument flag is True, but instrument is multi-dimensional"
            )
        T = T.flatten()
        y = y.flatten()

        n_samples = y.shape[0]
        pred_t = np.zeros(n_samples)
        proj_t = np.zeros(n_samples)
        res_y = np.zeros(n_samples)

        if self.n_splits == 1:
            splits = [(np.arange(X.shape[0]), np.arange(X.shape[0]))]
        # TODO. Deal with multi-class instrument
        elif self.binary_instrument or self.binary_treatment:
            group = 2 * T * self.binary_treatment + Z.flatten(
            ) * self.binary_instrument
            splits = StratifiedKFold(n_splits=self.n_splits,
                                     shuffle=True).split(X, group)
        else:
            splits = KFold(n_splits=self.n_splits, shuffle=True).split(X)

        for idx, (train, test) in enumerate(splits):
            # Calculate nuisances
            pred_t[test] = self.model_T_X[idx].fit(X[train],
                                                   T[train]).predict(X[test])
            proj_t[test] = self.model_T_XZ[idx].fit(
                hstack([X[train], Z[train]]),
                T[train]).predict(hstack([X[test], Z[test]]))
            res_y[test] = y[test] - \
                self.model_Y_X[idx].fit(X[train], y[train]).predict(X[test])

        # Estimate E[T_res | Z_res]
        res_z = proj_t - pred_t
        res_t = T - pred_t
        self._effect = np.mean(res_y * res_z) / np.mean(res_t * res_z)

        self._std = np.std(res_y * res_z) / (np.sqrt(res_y.shape[0]) *
                                             np.abs(np.mean(res_t * res_z)))

        return self
Exemplo n.º 4
0
    def effect(self, X, T0=0, T1=1):
        """
        Parameters
        ----------
        X : features
        """
        if not hasattr(T0, "__len__"):
            T0 = np.ones(X.shape[0]) * T0
        if not hasattr(T1, "__len__"):
            T1 = np.ones(X.shape[0]) * T1

        X0 = hstack([T0.reshape(-1, 1), X])
        X1 = hstack([T1.reshape(-1, 1), X])
        return self.model_final.predict(X1) - self.model_final.predict(X0)
Exemplo n.º 5
0
 def transform(self, X):
     # add column of ones to X
     X = hstack([np.ones((shape(X)[0], 1)), X])
     d_x = shape(X)[1]
     d_y, d_t = self._d_y, self._d_t
     # for each row, create the d_y*d_t*(d_x+1) features (which are matrices of size d_y by d_t)
     return reshape(np.einsum('nx,fyt->nfxyt', X, self._fts), (shape(X)[0], d_y * d_t * d_x, d_y, d_t))
Exemplo n.º 6
0
    def _nuisance_estimates(self, y, T, X, Z):

        n_samples = y.shape[0]
        prel_theta = np.zeros(n_samples)
        res_t = np.zeros(n_samples)
        res_y = np.zeros(n_samples)
        res_z = np.zeros(n_samples)
        cov = np.zeros(n_samples)
        proj_t = np.zeros(n_samples)

        splits = self._get_split_enum(y, T, X, Z)

        # TODO. The solution below is not really a valid cross-fitting
        # as the test data are used to create the proj_t on the train
        # which in the second train-test loop is used to create the nuisance
        # cov on the test data. Hence the T variable of some sample
        # is implicitly correlated with its cov nuisance, through this flow
        # of information. However, this seems a rather weak correlation.
        # The more kosher would be to do an internal nested cv loop for the T_XZ
        # model.
        splits, splits_one = tee(splits)
        # Estimate h(X, Z) = E[T | X, Z] in cross fitting manner
        for idx, (train, test) in enumerate(splits_one):
            self.model_T_XZ[idx].fit(hstack([X[train], Z[train]]), T[train])
            proj_t[test] = self.model_T_XZ[idx].predict(
                hstack([X[test], Z[test]]))

        for idx, (train, test) in enumerate(splits):
            # Estimate preliminary theta in cross fitting manner
            prel_theta[test] = self.prel_model_effect[idx].fit(
                y[train], T[train], X[train],
                Z[train]).effect(X[test]).flatten()
            # Estimate p(X) = E[T | X] in cross fitting manner
            self.model_T_X[idx].fit(X[train], T[train])
            pr_t_test = self.model_T_X[idx].predict(X[test])
            # Calculate residual T_res = T - p(X) and Z_res = h(Z, X) - p(X)
            res_t[test] = T[test] - pr_t_test
            res_z[test] = proj_t[test] - pr_t_test
            # Estimate residual Y_res = Y - q(X) = Y - E[Y | X] in cross fitting manner
            res_y[test] = y[test] - \
                self.model_Y_X[idx].fit(X[train], y[train]).predict(X[test])
            # Estimate cov[T, E[T|X,Z] | X] = E[T * E[T|X,Z]] - E[T|X]^2
            cov[test] = self.model_TZ_X[idx].fit(
                X[train], T[train] * proj_t[train]).predict(
                    X[test]) - pr_t_test**2

        return prel_theta, res_t, res_y, res_z, cov
Exemplo n.º 7
0
    def fit(self, y, T, X, Z):
        """
        Parameters
        ----------
        y : outcome
        T : treatment (single dimensional)
        X : features/controls
        Z : instrument
        """
        if len(T.shape) > 1 and T.shape[1] > 1:
            raise AssertionError(
                "Can only accept single dimensional treatment")
        if len(y.shape) > 1 and y.shape[1] > 1:
            raise AssertionError("Can only accept single dimensional outcome")
        if len(Z.shape) == 1:
            Z = Z.reshape(-1, 1)
        T = T.flatten()
        y = y.flatten()

        pred_t = self.model_T_XZ.fit(hstack([X, Z]), T).predict(hstack([X, Z]))
        self.model_final.fit(hstack([pred_t.reshape(-1, 1), X]), y)

        return self
Exemplo n.º 8
0
    def _test_sparse(n_p, d_w, n_r):
        # need at least as many rows in e_y as there are distinct columns
        # in [X;X⊗W;W⊗W;X⊗e_t] to find a solution for e_t
        assert n_p * n_r >= 2 * n_p + n_p * d_w + d_w * (d_w + 1) / 2
        a = np.random.normal(size=(n_p,))  # one effect per product
        n = n_p * n_r
        p = np.tile(range(n_p), n_r)  # product id

        b = np.random.normal(size=(d_w + n_p,))
        g = np.random.normal(size=(d_w + n_p,))

        x = np.empty((2 * n, n_p))  # product dummies
        w = np.empty((2 * n, d_w))
        y = np.empty(2 * n)
        t = np.empty(2 * n)

        for fold in range(0, 2):
            x_f = OneHotEncoder().fit_transform(np.reshape(p, (-1, 1))).toarray()
            w_f = np.random.normal(size=(n, d_w))
            xw_f = hstack([x_f, w_f])
            e_t_f, e_y_f = TestDML._generate_recoverable_errors(a, x_f, W=w_f)

            t_f = xw_f @ b + e_t_f
            y_f = t_f * np.choose(p, a) + xw_f @ g + e_y_f

            x[fold * n:(fold + 1) * n, :] = x_f
            w[fold * n:(fold + 1) * n, :] = w_f
            y[fold * n:(fold + 1) * n] = y_f
            t[fold * n:(fold + 1) * n] = t_f

        dml = SparseLinearDMLCateEstimator(LinearRegression(fit_intercept=False), LinearRegression(
            fit_intercept=False), featurizer=FunctionTransformer())
        dml.fit(y, t, x, w)

        # note that this would fail for the non-sparse DMLCateEstimator

        np.testing.assert_allclose(a, dml.coef_.reshape(-1))
        eff = reshape(t * np.choose(np.tile(p, 2), a), (-1, 1))
        np.testing.assert_allclose(eff, dml.effect(0, t, x))

        dml = SparseLinearDMLCateEstimator(LinearRegression(fit_intercept=False),
                                           LinearRegression(fit_intercept=False),
                                           featurizer=Pipeline([("id", FunctionTransformer()),
                                                                ("matrix", MatrixFeatures(1, 1))]))
        dml.fit(y, t, x, w)
        np.testing.assert_allclose(eff, dml.effect(0, t, x))
Exemplo n.º 9
0
    def fit(self, y, T, X, Z, store_final=False):
        """
        Parameters
        ----------
        y : outcome
        T : treatment (single dimensional)
        X : features/controls
        Z : instrument (single dimensional)
        store_final (bool) : whether to store the estimated nuisance values for
            fitting a different final stage model without the need of refitting
            the nuisance values. Increases memory usage. 
        """
        if len(T.shape) > 1 and T.shape[1] > 1:
            raise AssertionError(
                "Can only accept single dimensional treatment")
        if len(y.shape) > 1 and y.shape[1] > 1:
            raise AssertionError("Can only accept single dimensional outcome")
        if len(Z.shape) == 1:
            Z = Z.reshape(-1, 1)
        if (Z.shape[1] > 1) and self.binary_instrument:
            raise AssertionError(
                "Binary instrument flag is True, but instrument is multi-dimensional")

        T = T.flatten()
        y = y.flatten()

        n_samples = y.shape[0]
        proj_t = np.zeros(n_samples)
        pred_t = np.zeros(n_samples)
        res_y = np.zeros(n_samples)

        if self.n_splits == 1:
            splits = [(np.arange(X.shape[0]), np.arange(X.shape[0]))]
        # TODO. Deal with multi-class instrument/treatment
        elif self.binary_instrument or self.binary_treatment:
            group = 2*T*self.binary_treatment + Z.flatten()*self.binary_instrument
            splits = StratifiedKFold(
                n_splits=self.n_splits, shuffle=True).split(X, group)
        else:
            splits = KFold(n_splits=self.n_splits, shuffle=True).split(X)

        for idx, (train, test) in enumerate(splits):
            # Estimate h(Z, X) = E[T | Z, X] in cross-fitting manner
            proj_t[test] = self.model_T_XZ[idx].fit(hstack([X[train], Z[train]]),
                                                    T[train]).predict(hstack([X[test],
                                                                              Z[test]]))
            # Estimate residual Y_res = Y - q(X) = Y - E[Y | X] in cross-fitting manner
            res_y[test] = y[test] - \
                self.model_Y_X[idx].fit(X[train], y[train]).predict(X[test])
            # Estimate p(X) = E[T | X] in cross-fitting manner
            pred_t[test] = self.model_T_X[idx].fit(
                X[train], T[train]).predict(X[test])

        # Estimate theta by minimizing square loss (Y_res - theta(X) * (h(Z, X) - p(X)))^2
        self.model_effect.fit(res_y, (proj_t-pred_t).reshape(-1, 1), X)

        if store_final:
            self.stored_final_data = True
            self.X = X
            self.res_t = (proj_t-pred_t).reshape(-1, 1)
            self.res_y = res_y

        return self