示例#1
0
    def null_refutation_test(self, data=None, dataset="linear", beta=10,
            num_common_causes=1, num_instruments=1, num_samples=100000,
            treatment_is_binary=True):
        # Supports user-provided dataset object
        if data is None:
            data = dowhy.datasets.linear_dataset(beta=beta,
                                             num_common_causes=num_common_causes,
                                             num_instruments=num_instruments,
                                             num_samples=num_samples,
                                             treatment_is_binary=treatment_is_binary)

        model = CausalModel(
            data=data['df'],
            treatment=data["treatment_name"],
            outcome=data["outcome_name"],
            graph=data["gml_graph"],
            proceed_when_unidentifiable=True,
            test_significance=None
        )
        target_estimand = model.identify_effect()
        ate_estimate = model.estimate_effect(
            identified_estimand=target_estimand,
            method_name=self.estimator_method,
            test_significance=None
        )
        true_ate = data["ate"]
        self.logger.debug(true_ate)

        # To test if there are any exceptions
        ref = model.refute_estimate(target_estimand, ate_estimate,
            method_name=self.refuter_method,
            confounders_effect_on_treatment = self.confounders_effect_on_t,
            confounders_effect_on_outcome = self.confounders_effect_on_y,
            effect_strength_on_treatment =self.effect_strength_on_t,
            effect_strength_on_outcome=self.effect_strength_on_y)
        self.logger.debug(ref.new_effect)

        # To test if the estimate is identical if refutation parameters are zero
        refute = model.refute_estimate(target_estimand, ate_estimate,
            method_name=self.refuter_method,
            confounders_effect_on_treatment = self.confounders_effect_on_t,
            confounders_effect_on_outcome = self.confounders_effect_on_y,
            effect_strength_on_treatment = 0,
            effect_strength_on_outcome = 0)
        error = abs(refute.new_effect - ate_estimate.value)
        
        print("Error in refuted estimate = {0} with tolerance {1}%. Estimated={2},After Refutation={3}".format(
            error, self._error_tolerance * 100, ate_estimate.value, refute.new_effect)
        )
        res = True if (error < abs(ate_estimate.value) * self._error_tolerance) else False
        assert res
示例#2
0
    def predict(self, dataset: DatasetInterface):
        data = dataset.get_data()

        # Temporally add treatment.
        data['treatment'] = True
        treatment = 'treatment'

        outcome = dataset.get_outcome()
        common_causes = dataset.get_causes()

        model = CausalModel(data,
                            treatment,
                            outcome,
                            common_causes=common_causes,
                            proceed_when_unidentifiable=True)

        # Identify the causal effect
        relation = model.identify_effect()

        # Estimate the causal effect
        estimate = model.estimate_effect(
            relation,
            method_name="backdoor.linear_regression",
            test_significance=True)

        # Refute the obtained estimate
        result = model.refute_estimate(relation,
                                       estimate,
                                       method_name="random_common_cause")

        return result.estimated_effect, result.new_effect
示例#3
0
    def predict_tutorial(self, data: pd.DataFrame):
        # https://towardsdatascience.com/implementing-causal-inference-a-key-step-towards-agi-de2cde8ea599
        data = pd.read_csv(
            'https://raw.githubusercontent.com/AMLab-Amsterdam/CEVAE/master/datasets/IHDP/csv/ihdp_npci_1.csv',
            header=None)
        col = [
            'treatment',
            'y_factual',
            'y_cfactual',
            'mu0',
            'mu1',
        ]

        for i in range(1, 26):
            col.append('x' + str(i))

        data.columns = col
        data = data.astype({'treatment': 'bool'}, copy=False)
        result = data.head()

        # Create a causal model from the data and given common causes.
        xs = ""
        for i in range(1, 26):
            xs += ("x" + str(i) + "+")
        model = CausalModel(data=data,
                            treatment='treatment',
                            outcome='y_factual',
                            common_causes=xs.split('+'))

        # Identify the causal effect
        identified_estimand = model.identify_effect()
        print(identified_estimand)

        # Estimate the causal effect and compare it with Average Treatment Effect
        estimate = model.estimate_effect(
            identified_estimand,
            method_name="backdoor.linear_regression",
            test_significance=True)
        print(estimate)
        print("Causal Estimate is " + str(estimate.value))

        refute_results = model.refute_estimate(
            identified_estimand, estimate, method_name="random_common_cause")
        print(refute_results)

        dd = 3
示例#4
0
    def predict_example(self, data: pd.DataFrame):
        # https://github.com/Microsoft/dowhy
        # https://ntanmayee.github.io/articles/2018/11/16/tools-for-causality.html

        x = 'E1'
        y = 'E3'
        causes = ['E1', 'E2']

        model = CausalModel(data=data,
                            treatment=causes,
                            outcome=y,
                            proceed_when_unidentifiable=True)

        # Identify causal effect and return target estimands
        identified_estimand = model.identify_effect()

        # Estimate the target estimand using a statistical method.
        estimate = model.estimate_effect(
            identified_estimand,
            method_name="backdoor.propensity_score_matching")

        # Refute the obtained estimate using multiple robustness checks.
        refute_results = model.refute_estimate(
            identified_estimand, estimate, method_name="random_common_cause")
示例#5
0
    def null_refutation_test(self,
                             data=None,
                             dataset="linear",
                             beta=10,
                             num_common_causes=1,
                             num_instruments=1,
                             num_samples=100000,
                             treatment_is_binary=True):
        # Supports user-provided dataset object
        if data is None:
            data = dowhy.datasets.linear_dataset(
                beta=beta,
                num_common_causes=num_common_causes,
                num_instruments=num_instruments,
                num_samples=num_samples,
                treatment_is_binary=treatment_is_binary)

        print(data['df'])

        print("")
        model = CausalModel(data=data['df'],
                            treatment=data["treatment_name"],
                            outcome=data["outcome_name"],
                            graph=data["gml_graph"],
                            proceed_when_unidentifiable=True,
                            test_significance=None)
        target_estimand = model.identify_effect()
        ate_estimate = model.estimate_effect(
            identified_estimand=target_estimand,
            method_name=self.estimator_method,
            test_significance=None)
        true_ate = data["ate"]
        self.logger.debug(true_ate)

        if self.refuter_method == "add_unobserved_common_cause":
            # To test if there are any exceptions
            ref = model.refute_estimate(
                target_estimand,
                ate_estimate,
                method_name=self.refuter_method,
                confounders_effect_on_treatment=self.confounders_effect_on_t,
                confounders_effect_on_outcome=self.confounders_effect_on_y,
                effect_strength_on_treatment=self.effect_strength_on_t,
                effect_strength_on_outcome=self.effect_strength_on_y)
            self.logger.debug(ref.new_effect)

            # To test if the estimate is identical if refutation parameters are zero
            refute = model.refute_estimate(
                target_estimand,
                ate_estimate,
                method_name=self.refuter_method,
                confounders_effect_on_treatment=self.confounders_effect_on_t,
                confounders_effect_on_outcome=self.confounders_effect_on_y,
                effect_strength_on_treatment=0,
                effect_strength_on_outcome=0)
            error = abs(refute.new_effect - ate_estimate.value)

            print(
                "Error in refuted estimate = {0} with tolerance {1}%. Estimated={2},After Refutation={3}"
                .format(error, self._error_tolerance * 100, ate_estimate.value,
                        refute.new_effect))
            res = True if (error < abs(ate_estimate.value) *
                           self._error_tolerance) else False
            assert res

        elif self.refuter_method == "placebo_treatment_refuter":
            if treatment_is_binary is True:
                ref = model.refute_estimate(target_estimand,
                                            ate_estimate,
                                            method_name=self.refuter_method,
                                            num_simulations=10)
            else:
                ref = model.refute_estimate(target_estimand,
                                            ate_estimate,
                                            method_name=self.refuter_method)
            # This value is hardcoded to be zero as we are runnning this on a linear dataset.
            # Ordinarily, we should expect this value to be zero.
            EXPECTED_PLACEBO_VALUE = 0

            error = abs(ref.new_effect - EXPECTED_PLACEBO_VALUE)

            print(
                "Error in the refuted estimate = {0} with tolerence {1}%. Expected Value={2}, After Refutation={3}"
                .format(error, self._error_tolerance * 100,
                        EXPECTED_PLACEBO_VALUE, ref.new_effect))

            print(ref)

            res = True if (error < self._error_tolerance) else False
            assert res

        elif self.refuter_method == "data_subset_refuter":
            if treatment_is_binary is True:
                ref = model.refute_estimate(target_estimand,
                                            ate_estimate,
                                            method_name=self.refuter_method,
                                            num_simulations=5)
            else:
                ref = model.refute_estimate(target_estimand,
                                            ate_estimate,
                                            method_name=self.refuter_method)

            error = abs(ref.new_effect - ate_estimate.value)

            print(
                "Error in the refuted estimate = {0} with tolerence {1}%. Estimated={2}, After Refutation={3}"
                .format(error, self._error_tolerance * 100, ate_estimate.value,
                        ref.new_effect))

            print(ref)

            res = True if (error < abs(ate_estimate.value) *
                           self._error_tolerance) else False
            assert res

        elif self.refuter_method == "bootstrap_refuter":
            if treatment_is_binary is True:
                ref = model.refute_estimate(target_estimand,
                                            ate_estimate,
                                            method_name=self.refuter_method,
                                            num_simulations=5)
            else:
                ref = model.refute_estimate(target_estimand,
                                            ate_estimate,
                                            method_name=self.refuter_method)

            error = abs(ref.new_effect - ate_estimate.value)

            print(
                "Error in the refuted estimate = {0} with tolerence {1}%. Estimated={2}, After Refutation={3}"
                .format(error, self._error_tolerance * 100, ate_estimate.value,
                        ref.new_effect))

            print(ref)

            res = True if (error < abs(ate_estimate.value) *
                           self._error_tolerance) else False
            assert res

        elif self.refuter_method == "dummy_outcome_refuter":
            if self.transformations is None:
                ref = model.refute_estimate(target_estimand,
                                            ate_estimate,
                                            method_name=self.refuter_method,
                                            num_simulations=2)
            else:
                ref = model.refute_estimate(
                    target_estimand,
                    ate_estimate,
                    method_name=self.refuter_method,
                    transformations=self.transformations,
                    params=self.params,
                    num_simulations=2)

                # This value is hardcoded to be zero as we are runnning this on a linear dataset.
                # Ordinarily, we should expect this value to be zero.
                EXPECTED_DUMMY_OUTCOME_VALUE = 0

                error = abs(ref.new_effect - EXPECTED_DUMMY_OUTCOME_VALUE)

                print(
                    "Error in the refuted estimate = {0} with tolerence {1}%. Expected Value={2}, After Refutation={3}"
                    .format(error, self._error_tolerance * 100,
                            EXPECTED_DUMMY_OUTCOME_VALUE, ref.new_effect))

                print(ref)

                assert ref
示例#6
0
class DoWhyWrapper:
    """
    A wrapper class to allow user call other methods from dowhy package through EconML.
    (e.g. causal graph, refutation test, etc.)

    Parameters
    ----------
    cate_estimator: instance
        An instance of any CATE estimator we currently support
    """

    def __init__(self, cate_estimator):
        self._cate_estimator = cate_estimator

    def _get_params(self):
        init = self._cate_estimator.__init__
        # introspect the constructor arguments to find the model parameters
        # to represent
        init_signature = inspect.signature(init)
        parameters = init_signature.parameters.values()
        for p in parameters:
            if p.kind == p.VAR_POSITIONAL or p.kind == p.VAR_KEYWORD:
                raise RuntimeError("cate estimators should always specify their parameters in the signature "
                                   "of their __init__ (no varargs, no varkwargs). "
                                   f"{self._cate_estimator} with constructor {init_signature} doesn't "
                                   "follow this convention.")
        # Extract and sort argument names excluding 'self'
        return sorted([p.name for p in parameters])

    def fit(self, Y, T, X=None, W=None, Z=None, *, outcome_names=None, treatment_names=None, feature_names=None,
            confounder_names=None, instrument_names=None, graph=None, estimand_type="nonparametric-ate",
            proceed_when_unidentifiable=True, missing_nodes_as_confounders=False,
            control_value=0, treatment_value=1, target_units="ate", **kwargs):
        """
        Estimate the counterfactual model from data through dowhy package.

        Parameters
        ----------
        Y: vector of length n
            Outcomes for each sample
        T: vector of length n
            Treatments for each sample
        X: optional (n, d_x) matrix (Default=None)
            Features for each sample
        W: optional (n, d_w) matrix (Default=None)
            Controls for each sample
        Z: optional (n, d_z) matrix (Default=None)
            Instruments for each sample
        outcome_names: optional list (Default=None)
            Name of the outcome
        treatment_names: optional list (Default=None)
            Name of the treatment
        feature_names: optional list (Default=None)
            Name of the features
        confounder_names: optional list (Default=None)
            Name of the confounders
        instrument_names: optional list (Default=None)
            Name of the instruments
        graph: optional
            Path to DOT file containing a DAG or a string containing a DAG specification in DOT format
        estimand_type: optional string
            Type of estimand requested (currently only "nonparametric-ate" is supported).
            In the future, may support other specific parametric forms of identification
        proceed_when_unidentifiable: optional bool (Default=True)
            Whether the identification should proceed by ignoring potential unobserved confounders
        missing_nodes_as_confounders: optional bool (Default=False)
            Whether variables in the dataframe that are not included in the causal graph should be automatically
            included as confounder nodes
        control_value: optional scalar (Default=0)
            Value of the treatment in the control group, for effect estimation
        treatment_value: optional scalar (Default=1)
            Value of the treatment in the treated group, for effect estimation
        target_units: optional (Default="ate")
            The units for which the treatment effect should be estimated.
            This can be of three types:

            1. A string for common specifications of target units (namely, "ate", "att" and "atc"),
            2. A lambda function that can be used as an index for the data (pandas DataFrame),
            3. A new DataFrame that contains values of the effect_modifiers and effect will be estimated
               only for this new data

        kwargs: optional
            Other keyword arguments from fit method for CATE estimator

        Returns
        -------
        self
        """

        Y, T, X, W, Z = check_input_arrays(Y, T, X, W, Z)

        # create dataframe
        n_obs = Y.shape[0]
        Y, T, X, W, Z = reshape_arrays_2dim(n_obs, Y, T, X, W, Z)

        # currently dowhy only support single outcome and single treatment
        assert Y.shape[1] == 1, "Can only accept single dimensional outcome."
        assert T.shape[1] == 1, "Can only accept single dimensional treatment."

        # column names
        if outcome_names is None:
            outcome_names = [f"Y{i}" for i in range(Y.shape[1])]
        if treatment_names is None:
            treatment_names = [f"T{i}" for i in range(T.shape[1])]
        if feature_names is None:
            feature_names = [f"X{i}" for i in range(X.shape[1])]
        if confounder_names is None:
            confounder_names = [f"W{i}" for i in range(W.shape[1])]
        if instrument_names is None:
            instrument_names = [f"Z{i}" for i in range(Z.shape[1])]
        column_names = outcome_names + treatment_names + feature_names + confounder_names + instrument_names
        df = pd.DataFrame(np.hstack((Y, T, X, W, Z)), columns=column_names)
        self.dowhy_ = CausalModel(
            data=df,
            treatment=treatment_names,
            outcome=outcome_names,
            graph=graph,
            common_causes=feature_names + confounder_names if X.shape[1] > 0 or W.shape[1] > 0 else None,
            instruments=instrument_names if Z.shape[1] > 0 else None,
            effect_modifiers=feature_names if X.shape[1] > 0 else None,
            estimand_type=estimand_type,
            proceed_when_unidetifiable=proceed_when_unidentifiable,
            missing_nodes_as_confounders=missing_nodes_as_confounders
        )
        self.identified_estimand_ = self.dowhy_.identify_effect(proceed_when_unidentifiable=True)
        method_name = "backdoor." + self._cate_estimator.__module__ + "." + self._cate_estimator.__class__.__name__
        init_params = {}
        for p in self._get_params():
            init_params[p] = getattr(self._cate_estimator, p)
        self.estimate_ = self.dowhy_.estimate_effect(self.identified_estimand_,
                                                     method_name=method_name,
                                                     control_value=control_value,
                                                     treatment_value=treatment_value,
                                                     target_units=target_units,
                                                     method_params={
                                                         "init_params": init_params,
                                                         "fit_params": kwargs,
                                                     },
                                                     )
        return self

    def refute_estimate(self, *, method_name, **kwargs):
        """
        Refute an estimated causal effect.

        If method_name is provided, uses the provided method. In the future, we may support automatic
        selection of suitable refutation tests.
        Following refutation methods are supported:

        - Adding a randomly-generated confounder: "random_common_cause"
        - Adding a confounder that is associated with both treatment and outcome: "add_unobserved_common_cause"
        - Replacing the treatment with a placebo (random) variable): "placebo_treatment_refuter"
        - Removing a random subset of the data: "data_subset_refuter"

        For more details, see docs :mod:`dowhy.causal_refuters`

        Parameters
        ----------
        method_name: string
            Name of the refutation method
        kwargs: optional
            Additional arguments that are passed directly to the refutation method.
            Can specify a random seed here to ensure reproducible results ('random_seed' parameter).
            For method-specific parameters, consult the documentation for the specific method.
            All refutation methods are in the causal_refuters subpackage.

        Returns
        -------
        RefuteResult: an instance of the RefuteResult class
        """
        return self.dowhy_.refute_estimate(
            self.identified_estimand_, self.estimate_, method_name=method_name, **kwargs
        )

    # We don't allow user to call refit_final from this class, since internally dowhy effect estimate will only update
    # cate estimator but not the effect.
    def refit_final(self, inference=None):
        raise AttributeError(
            "Method refit_final is not allowed through a dowhy object; please perform a full fit instead.")

    def __getattr__(self, attr):
        # don't proxy special methods
        if attr.startswith('__'):
            raise AttributeError(attr)
        elif attr in ['_cate_estimator', 'dowhy_',
                      'identified_estimand_', 'estimate_']:
            return super().__getattr__(attr)
        elif attr.startswith('dowhy__'):
            return getattr(self.dowhy_, attr[len('dowhy__'):])
        elif hasattr(self.estimate_._estimator_object, attr):
            if hasattr(self.dowhy_, attr):
                warnings.warn("This call is ambiguous, "
                              "we're defaulting to CATE estimator's attribute. "
                              "Please add 'dowhy__' as prefix if you want to get dowhy attribute.", UserWarning)
            return getattr(self.estimate_._estimator_object, attr)
        else:
            return getattr(self.dowhy_, attr)

    def __setattr__(self, attr, value):
        if attr in ['_cate_estimator', 'dowhy_',
                    'identified_estimand_', 'estimate_']:
            super().__setattr__(attr, value)
        elif attr.startswith('dowhy__'):
            setattr(self.dowhy_, attr[len('dowhy__'):], value)
        elif hasattr(self.estimate_._estimator_object, attr):
            if hasattr(self.dowhy_, attr):
                warnings.warn("This call is ambiguous, "
                              "we're defaulting to CATE estimator's attribute. "
                              "Please add 'dowhy__' as prefix if you want to set dowhy attribute.", UserWarning)
            setattr(self.estimate_._estimator_object, attr, value)
        else:
            setattr(self.dowhy_, attr, value)
    data=data,
    treatment='treatment',
    outcome='y_factual',
    common_causes=xs.split('+'),
)
#save the model as a png
model.view_model()
display(Image(filename="causal_model.png"))

#Identify the causal effect
identified_estimand = model.identify_effect()
print(identified_estimand)

# Estimate the causal effect and compare it with Average Treatment Effect
estimate = model.estimate_effect(identified_estimand,
                                 method_name="backdoor.linear_regression",
                                 test_significance=True)

print(estimate)

print("Causal Estimate is " + str(estimate.value))
data_1 = data[data["treatment"] == 1]
data_0 = data[data["treatment"] == 0]

print("ATE", np.mean(data_1["y_factual"]) - np.mean(data_0["y_factual"]))

refute_results = model.refute_estimate(identified_estimand,
                                       estimate,
                                       method_name="random_common_cause")
print(refute_results)