示例#1
0
文件: base.py 项目: zihanmok/dowhy
 def custom_data_average_treatment_effect_test(self, data):
     model = CausalModel(
         data=data['df'],
         treatment=data["treatment_name"],
         outcome=data["outcome_name"],
         graph=data["gml_graph"],
         proceed_when_unidentifiable=True,
         test_significance=None
     )
     target_estimand = model.identify_effect()
     estimator_ate = self._Estimator(
         data['df'],
         identified_estimand=target_estimand,
         treatment=data["treatment_name"],
         outcome=data["outcome_name"],
         test_significance=None
     )
     true_ate = data["ate"]
     ate_estimate = estimator_ate.estimate_effect()
     error = ate_estimate.value - true_ate
     print("Error in ATE estimate = {0} with tolerance {1}%. Estimated={2},True={3}".format(
         error, self._error_tolerance * 100, ate_estimate.value, true_ate)
     )
     res = True if (error < true_ate * self._error_tolerance) else False
     assert res
示例#2
0
    def predict(self, dataset: DatasetInterface):
        data = dataset.get_data()

        # Temporally add treatment.
        data['treatment'] = True
        treatment = 'treatment'

        outcome = dataset.get_outcome()
        common_causes = dataset.get_causes()

        model = CausalModel(data,
                            treatment,
                            outcome,
                            common_causes=common_causes,
                            proceed_when_unidentifiable=True)

        # Identify the causal effect
        relation = model.identify_effect()

        # Estimate the causal effect
        estimate = model.estimate_effect(
            relation,
            method_name="backdoor.linear_regression",
            test_significance=True)

        # Refute the obtained estimate
        result = model.refute_estimate(relation,
                                       estimate,
                                       method_name="random_common_cause")

        return result.estimated_effect, result.new_effect
示例#3
0
 def test_graph_input(self, beta, num_instruments, num_samples, num_treatments):
     num_common_causes = 5
     data = dowhy.datasets.linear_dataset(beta=beta,
                                          num_common_causes=num_common_causes,
                                          num_instruments=num_instruments,
                                          num_samples=num_samples,
                                          num_treatments = num_treatments,
                                          treatment_is_binary=True)
     
     model = CausalModel(
         data=data['df'],
         treatment=data["treatment_name"],
         outcome=data["outcome_name"],
         graph=data["gml_graph"],
         proceed_when_unidentifiable=True,
         test_significance=None
     )
     # removing two common causes
     gml_str = 'graph[directed 1 node[ id "{0}" label "{0}"]node[ id "{1}" label "{1}"]node[ id "Unobserved Confounders" label "Unobserved Confounders"]edge[source "{0}" target "{1}"]edge[source "Unobserved Confounders" target "{0}"]edge[source "Unobserved Confounders" target "{1}"]node[ id "X0" label "X0"] edge[ source "X0" target "{0}"] node[ id "X1" label "X1"] edge[ source "X1" target "{0}"] node[ id "X2" label "X2"] edge[ source "X2" target "{0}"] edge[ source "X0" target "{1}"] edge[ source "X1" target "{1}"] edge[ source "X2" target "{1}"] node[ id "Z0" label "Z0"] edge[ source "Z0" target "{0}"]]'.format(data["treatment_name"][0], data["outcome_name"])
     print(gml_str)
     model = CausalModel(
         data=data['df'],
         treatment=data["treatment_name"],
         outcome=data["outcome_name"],
         graph=gml_str,
         proceed_when_unidentifiable=True,
         test_significance=None,
         missing_nodes_as_confounders=True
     )
     common_causes = model.get_common_causes()
     assert all(node_name in common_causes for node_name in ["X1", "X2"])
示例#4
0
    def test_causalml_XGBTRegressor(self, init_data):
        # Defined a linear dataset with a given set of properties
        data = init_data

        # Create a model that captures the same
        model = CausalModel(
            data=data['df'],
            treatment=data['treatment_name'],
            outcome=data['outcome_name'],
            effect_modifiers=data['effect_modifier_names'],
            graph=data['gml_graph']
        )

        # Identify the effects within the model
        identified_estimand = model.identify_effect(
            proceed_when_unidentifiable=True
        )

        xgbt_estimate = model.estimate_effect(
            identified_estimand,
            method_name="backdoor.causalml.inference.meta.XGBTRegressor",
            method_params={"init_params":{}}
        )

        print("The XGBT estimate obtained:")
        print(xgbt_estimate)
示例#5
0
    def test_causalml_RLearner(self, init_data):
        # Defined a linear dataset with a given set of properties
        data = init_data

        # Create a model that captures the same
        model = CausalModel(
            data=data['df'],
            treatment=data['treatment_name'],
            outcome=data['outcome_name'],
            effect_modifiers=data['effect_modifier_names'],
            graph=data['gml_graph']
        )

        # Identify the effects within the model
        identified_estimand = model.identify_effect(
            proceed_when_unidentifiable=True
        )

        rl_estimate = None

        try:
            rl_estimate = model.estimate_effect(
                identified_estimand,
                method_name="backdoor.causalml.inference.meta.BaseRRegressor",
                method_params={"init_params":{
                        'learner':XGBRegressor()
                    }
                }
            )
        except ValueError:
            print("Error with respect to the number of samples")
        
        print("The R Learner estimate obtained:")
        print(rl_estimate)
示例#6
0
    def test_5(self):
        treatment = "T"
        outcome = "Y"
        variables = ["X1", "X2"]
        causal_graph = "digraph{T->Y;X1->T;X1->Y;X2->T;}"
        columns = list(treatment) + list(outcome) + list(variables)
        df = pd.DataFrame(columns=columns)

        # Calculate causal effect twice: once for unit (t=1, c=0), once for specific increase (t=100, c=50)
        causal_model = CausalModel(df, treatment, outcome, graph=causal_graph)
        identified_estimand = causal_model.identify_effect(
            method_name="id-algorithm")

        # Compare with ground truth
        set_a = set(identified_estimand._product[0]._product[0]._product[0]
                    ['outcome_vars']._set)
        set_b = set(identified_estimand._product[0]._product[0]._product[0]
                    ['condition_vars']._set)
        set_c = set(identified_estimand._product[0]._product[1]._product[0]
                    ['outcome_vars']._set)
        set_d = set(identified_estimand._product[0]._product[1]._product[0]
                    ['condition_vars']._set)
        assert identified_estimand._product[0]._sum == ['X1']
        assert len(set_a.difference({'Y'})) == 0
        assert len(set_b.difference({'X1', 'X2', 'T'})) == 0
        assert len(set_c.difference({'X1'})) == 0
        assert len(set_d) == 0
示例#7
0
    def test_graph_input4(self, beta, num_instruments, num_samples, num_treatments):
        num_common_causes = 5
        data = dowhy.datasets.linear_dataset(beta=beta,
                                             num_common_causes=num_common_causes,
                                             num_instruments=num_instruments,
                                             num_samples=num_samples,
                                             num_treatments = num_treatments,
                                             treatment_is_binary=True)

        model = CausalModel(
            data=data['df'],
            treatment=data["treatment_name"],
            outcome=data["outcome_name"],
            graph=data["gml_graph"],
            proceed_when_unidentifiable=True,
            test_significance=None
        )
        # removing two common causes 
        gml_str = "tests/sample_dag.txt"
        print(gml_str)
        model = CausalModel(
            data=data['df'],
            treatment=data["treatment_name"],
            outcome=data["outcome_name"],
            graph=gml_str,
            proceed_when_unidentifiable=True,
            test_significance=None,
            missing_nodes_as_confounders=True
        )
        common_causes = model.get_common_causes()
        assert all(node_name in common_causes for node_name in ["X1", "X2"])
        all_nodes = model._graph.get_all_nodes(include_unobserved=True)
        assert all(node_name in all_nodes for node_name in ["Unobserved Confounders", "X0", "X1", "X2", "Z0", "v0", "y"])
        all_nodes = model._graph.get_all_nodes(include_unobserved=False)
        assert "Unobserved Confounders" not in all_nodes
示例#8
0
 def average_treatment_effect_test_continuous(self,
                                              dataset="linear",
                                              beta=1,
                                              num_common_causes=3,
                                              num_instruments=2,
                                              num_samples=100000,
                                              treatment_is_binary=False):
     data = dowhy.datasets.linear_dataset(
         beta=beta,
         num_common_causes=num_common_causes,
         num_instruments=num_instruments,
         num_samples=num_samples,
         treatment_is_binary=treatment_is_binary)
     model = CausalModel(data=data['df'],
                         treatment=data["treatment_name"],
                         outcome=data["outcome_name"],
                         graph=data["gml_graph"],
                         proceed_when_unidentifiable=True,
                         test_significance=None)
     target_estimand = model.identify_effect()
     estimator_ate = self._Estimator(data['df'],
                                     identified_estimand=target_estimand,
                                     treatment=data["treatment_name"],
                                     outcome=data["outcome_name"],
                                     test_significance=None)
     true_ate = data["ate"]
     ate_estimate = estimator_ate.estimate_effect()
     error = abs(ate_estimate.value - true_ate)
     print(
         "Error in ATE estimate = {0} with tolerance {1}%. Estimated={2},True={3}"
         .format(error, self._error_tolerance * 100, ate_estimate.value,
                 true_ate))
     res = True if (
         error < abs(true_ate) * self._error_tolerance) else False
     assert res
示例#9
0
    def test_causalml_MLPTRegressor(self, init_data):
        # Defined a linear dataset with a given set of properties
        data = init_data

        # Create a model that captures the same
        model = CausalModel(
            data=data['df'],
            treatment=data['treatment_name'],
            outcome=data['outcome_name'],
            effect_modifiers=data['effect_modifier_names'],
            graph=data['gml_graph']
        )

        # Identify the effects within the model
        identified_estimand = model.identify_effect(
            proceed_when_unidentifiable=True
        )

        mlpt_estimate = model.estimate_effect(
            identified_estimand,
            method_name="backdoor.causalml.inference.meta.MLPTRegressor",
            method_params={"init_params":{
                    'hidden_layer_sizes':(10,10),
                    'learning_rate_init':0.1,
                    'early_stopping':True 
                }
            }
        )

        print("The MLPT estimate obtained:")
        print(mlpt_estimate)
示例#10
0
def simulate_dag_violations(
    methods,  # estimators to use
    beta,  # true treatment effect
    num_w_affected,  # number of common causes affected
    effect_on_w,  # effect of U on common causes
    num_z_affected,  # number of common causes affected
    effect_on_z,  # effect of U on instruments
    num_t_affected,  # number of treatments affected
    effect_on_t,  # effect of U on treatment
    effect_on_y,  # effect of U on outcomes
    times,  # number of simulation
):

    output = []
    for _ in range(times):
        # beta, num_common_causes, num_instruments, num_samples, etc. are as in the tutorial
        data = modified_linear_dataset(
            beta=beta,
            # u -> common causes
            num_w_affected=num_w_affected,
            effect_on_w=effect_on_w,
            # u -> instruments
            num_z_affected=num_z_affected,
            effect_on_z=effect_on_z,
            # u -> treatment
            num_t_affected=num_t_affected,
            effect_on_t=effect_on_t,
            # u -> outcome
            effect_on_y=effect_on_y,
            num_common_causes=5,
            num_instruments=2,
            num_samples=10000,
            treatment_is_binary=True,
        )

        df = data["df"]

        model = CausalModel(
            data=df,
            treatment=data["treatment_name"],
            outcome=data["outcome_name"],
            graph=data["gml_graph"],
            instruments=data["instrument_names"],
            proceed_when_unidentifiable=True,
        )

        identified_estimand = model.identify_effect()

        estimates = [
            model.estimate_effect(
                identified_estimand, method_name=i[0], method_params=i[1]
            ).value
            for i in methods
        ]

        tmp_output = list(zip(estimates, [item[0] for item in methods]))

        output = output + tmp_output

    return output
示例#11
0
    def test_external_estimator(self, beta, num_samples, num_treatments):
        num_common_causes = 5
        data = dowhy.datasets.linear_dataset(
            beta=beta,
            num_common_causes=num_common_causes,
            num_samples=num_samples,
            num_treatments=num_treatments,
            treatment_is_binary=True,
        )

        model = CausalModel(
            data=data["df"],
            treatment=data["treatment_name"],
            outcome=data["outcome_name"],
            graph=data["gml_graph"],
            proceed_when_unidentifiable=True,
            test_significance=None,
        )

        identified_estimand = model.identify_effect(proceed_when_unidentifiable=True)

        estimate = model.estimate_effect(
            identified_estimand,
            method_name="backdoor.tests.causal_estimators.mock_external_estimator.PropensityScoreWeightingEstimator",
            control_value=0,
            treatment_value=1,
            target_units="ate",  # condition used for CATE
            confidence_intervals=True,
            method_params={
                "propensity_score_model": linear_model.LogisticRegression(max_iter=1000)
            },
        )

        assert estimate.estimator.propensity_score_model.max_iter == 1000
示例#12
0
 def test_graph_input3(self, beta, num_instruments, num_samples, num_treatments):
     num_common_causes = 5
     data = dowhy.datasets.linear_dataset(beta=beta,
                                          num_common_causes=num_common_causes,
                                          num_instruments=num_instruments,
                                          num_samples=num_samples,
                                          num_treatments = num_treatments,
                                          treatment_is_binary=True)
     model = CausalModel(
         data=data['df'],
         treatment=data["treatment_name"],
         outcome=data["outcome_name"],
         graph=data["gml_graph"],
         proceed_when_unidentifiable=True,
         test_significance=None
     )
     # removing two common causes 
     gml_str = """dag {
     "Unobserved Confounders" [pos="0.491,-1.056"]
     X0 [pos="-2.109,0.057"]
     X1 [adjusted, pos="-0.453,-1.562"]
     X2 [pos="-2.268,-1.210"]
     Z0 [pos="-1.918,-1.735"]
     v0 [latent, pos="-1.525,-1.293"]
     y [outcome, pos="-1.164,-0.116"]
     "Unobserved Confounders" -> v0
     "Unobserved Confounders" -> y
     X0 -> v0
     X0 -> y
     X1 -> v0
     X1 -> y
     X2 -> v0
     X2 -> y
     Z0 -> v0
     v0 -> y
     }
     """
     print(gml_str)
     model = CausalModel(
         data=data['df'],
         treatment=data["treatment_name"],
         outcome=data["outcome_name"],
         graph=gml_str,
         proceed_when_unidentifiable=True,
         test_significance=None,
         missing_nodes_as_confounders=True
     )
     common_causes = model.get_common_causes()
     assert all(node_name in common_causes for node_name in ["X1", "X2"])
     all_nodes = model._graph.get_all_nodes(include_unobserved=True)
     assert all(node_name in all_nodes for node_name in ["Unobserved Confounders", "X0", "X1", "X2", "Z0", "v0", "y"])
     all_nodes = model._graph.get_all_nodes(include_unobserved=False)
     assert "Unobserved Confounders" not in all_nodes
示例#13
0
 def test_graph_refutation(self, num_variables,num_samples):
     data = dowhy.datasets.dataset_from_random_graph(num_vars = num_variables, num_samples= num_samples)
     df = data["df"]
     model = CausalModel(
         data=df,
         treatment=data["treatment_name"],
         outcome=data["outcome_name"],
         graph=data["gml_graph"],
     )
     graph_refutation_object = model.refute_graph(k = 1, independence_test = 
     {'test_for_continuous': 'partial_correlation', 
     'test_for_discrete' : 'conditional_mutual_information'})
     assert graph_refutation_object.refutation_result == True
示例#14
0
    def test_1(self):
        treatment = "T"
        outcome = "Y"
        causal_graph = "digraph{T->Y;}"
        columns = list(treatment) + list(outcome)
        df = pd.DataFrame(columns=columns)

        # Calculate causal effect twice: once for unit (t=1, c=0), once for specific increase (t=100, c=50)
        causal_model = CausalModel(df, treatment, outcome, graph=causal_graph)
        identified_estimand = causal_model.identify_effect(
            method_name="id-algorithm")

        # Only P(Y|T) should be present for test to succeed.
        identified_str = identified_estimand.__str__()
        gt_str = "Predictor: P(Y|T)"
        assert identified_str == gt_str
示例#15
0
    def test_2(self):
        treatment = "T"
        outcome = "Y"
        variables = ["X1", "X2"]
        causal_graph = "digraph{T->X1;T->X2;X1->X2;X2->Y;T->Y}"

        vars = list(treatment) + list(outcome) + list(variables)
        df = pd.DataFrame(columns=vars)

        treatment_name = parse_state(treatment)
        outcome_name = parse_state(outcome)

        # Causal model initialization
        causal_model = CausalModel(df, treatment, outcome, graph=causal_graph)

        # Causal identifier identification
        identifier = CausalIdentifier(causal_model._graph,
                                      estimand_type=None,
                                      method_name="default",
                                      proceed_when_unidentifiable=None)

        # Obtain backdoor sets
        path = Backdoor(identifier._graph._graph, treatment_name, outcome_name)
        backdoor_sets = path.get_backdoor_vars()

        assert len(backdoor_sets) == 0
示例#16
0
    def test_4(self):
        treatment = "T"
        outcome = "Y"
        variables = ["X1"]
        causal_graph = "digraph{T->Y;T->X1;X1->Y;}"
        columns = list(treatment) + list(outcome) + list(variables)
        df = pd.DataFrame(columns=columns)

        # Calculate causal effect twice: once for unit (t=1, c=0), once for specific increase (t=100, c=50)
        causal_model = CausalModel(df, treatment, outcome, graph=causal_graph)
        identified_estimand = causal_model.identify_effect(
            method_name="id-algorithm")

        # Compare with ground truth
        identified_str = identified_estimand.__str__()
        gt_str = "Sum over {X1}:\n\tPredictor: P(Y|T,X1)\n\tPredictor: P(X1|T)"
        assert identified_str == gt_str
示例#17
0
    def test_2(self):
        '''
        Test undirected edge between treatment and outcome.
        '''
        treatment = "T"
        outcome = "Y"
        causal_graph = "digraph{T->Y; Y->T;}"
        columns = list(treatment) + list(outcome)
        df = pd.DataFrame(columns=columns)

        # Calculate causal effect twice: once for unit (t=1, c=0), once for specific increase (t=100, c=50)
        causal_model = CausalModel(df, treatment, outcome, graph=causal_graph)

        # Since undirected graph, identify effect must throw an error.
        with pytest.raises(Exception):
            identified_estimand = causal_model.identify_effect(
                method_name="id-algorithm")
示例#18
0
    def predict_tutorial(self, data: pd.DataFrame):
        # https://towardsdatascience.com/implementing-causal-inference-a-key-step-towards-agi-de2cde8ea599
        data = pd.read_csv(
            'https://raw.githubusercontent.com/AMLab-Amsterdam/CEVAE/master/datasets/IHDP/csv/ihdp_npci_1.csv',
            header=None)
        col = [
            'treatment',
            'y_factual',
            'y_cfactual',
            'mu0',
            'mu1',
        ]

        for i in range(1, 26):
            col.append('x' + str(i))

        data.columns = col
        data = data.astype({'treatment': 'bool'}, copy=False)
        result = data.head()

        # Create a causal model from the data and given common causes.
        xs = ""
        for i in range(1, 26):
            xs += ("x" + str(i) + "+")
        model = CausalModel(data=data,
                            treatment='treatment',
                            outcome='y_factual',
                            common_causes=xs.split('+'))

        # Identify the causal effect
        identified_estimand = model.identify_effect()
        print(identified_estimand)

        # Estimate the causal effect and compare it with Average Treatment Effect
        estimate = model.estimate_effect(
            identified_estimand,
            method_name="backdoor.linear_regression",
            test_significance=True)
        print(estimate)
        print("Causal Estimate is " + str(estimate.value))

        refute_results = model.refute_estimate(
            identified_estimand, estimate, method_name="random_common_cause")
        print(refute_results)

        dd = 3
示例#19
0
def dowhy_quick_backdoor_estimator(dataframe,
                                   outcome,
                                   treatment,
                                   cofounders_list,
                                   method_name,
                                   populaton_of_interest='ate',
                                   view_model=False):
    """
    Make a quick statistical assessment for the mean of 2 different samples (hypothesis test based)
    :param dataframe: original dataframe in a subject level
    :param group_col: the name of the group column
    :param category_col: the name of the category_col column
    :returns group_share_per_category_df: df containing the % share each category has by group
    """
    causal_model = CausalModel(data=dataframe,
                               treatment=treatment,
                               outcome=outcome,
                               common_causes=cofounders_list)
    if view_model:
        causal_model.view_model(layout="dot")
    identified_estimand = causal_model.identify_effect(
        proceed_when_unidentifiable=True)
    causal_estimate = causal_model.estimate_effect(
        identified_estimand,
        method_name=method_name,
        target_units=
        populaton_of_interest  #, confidence_intervals=True # not in this release
    )
    return causal_estimate.value
示例#20
0
    def predict_example(self, data: pd.DataFrame):
        # https://github.com/Microsoft/dowhy
        # https://ntanmayee.github.io/articles/2018/11/16/tools-for-causality.html

        x = 'E1'
        y = 'E3'
        causes = ['E1', 'E2']

        model = CausalModel(data=data,
                            treatment=causes,
                            outcome=y,
                            proceed_when_unidentifiable=True)

        # Identify causal effect and return target estimands
        identified_estimand = model.identify_effect()

        # Estimate the target estimand using a statistical method.
        estimate = model.estimate_effect(
            identified_estimand,
            method_name="backdoor.propensity_score_matching")

        # Refute the obtained estimate using multiple robustness checks.
        refute_results = model.refute_estimate(
            identified_estimand, estimate, method_name="random_common_cause")
示例#21
0
    def null_refutation_test(self, data=None, dataset="linear", beta=10,
            num_common_causes=1, num_instruments=1, num_samples=100000,
            treatment_is_binary=True):
        # Supports user-provided dataset object
        if data is None:
            data = dowhy.datasets.linear_dataset(beta=beta,
                                             num_common_causes=num_common_causes,
                                             num_instruments=num_instruments,
                                             num_samples=num_samples,
                                             treatment_is_binary=treatment_is_binary)

        model = CausalModel(
            data=data['df'],
            treatment=data["treatment_name"],
            outcome=data["outcome_name"],
            graph=data["gml_graph"],
            proceed_when_unidentifiable=True,
            test_significance=None
        )
        target_estimand = model.identify_effect()
        ate_estimate = model.estimate_effect(
            identified_estimand=target_estimand,
            method_name=self.estimator_method,
            test_significance=None
        )
        true_ate = data["ate"]
        self.logger.debug(true_ate)

        # To test if there are any exceptions
        ref = model.refute_estimate(target_estimand, ate_estimate,
            method_name=self.refuter_method,
            confounders_effect_on_treatment = self.confounders_effect_on_t,
            confounders_effect_on_outcome = self.confounders_effect_on_y,
            effect_strength_on_treatment =self.effect_strength_on_t,
            effect_strength_on_outcome=self.effect_strength_on_y)
        self.logger.debug(ref.new_effect)

        # To test if the estimate is identical if refutation parameters are zero
        refute = model.refute_estimate(target_estimand, ate_estimate,
            method_name=self.refuter_method,
            confounders_effect_on_treatment = self.confounders_effect_on_t,
            confounders_effect_on_outcome = self.confounders_effect_on_y,
            effect_strength_on_treatment = 0,
            effect_strength_on_outcome = 0)
        error = abs(refute.new_effect - ate_estimate.value)
        
        print("Error in refuted estimate = {0} with tolerance {1}%. Estimated={2},After Refutation={3}".format(
            error, self._error_tolerance * 100, ate_estimate.value, refute.new_effect)
        )
        res = True if (error < abs(ate_estimate.value) * self._error_tolerance) else False
        assert res
示例#22
0
    def test_1(self):
        treatment = "T"
        outcome = "Y"
        variables = ["X1", "X2"]
        causal_graph = "digraph{X1->T;X2->T;X1->X2;X2->Y;T->Y}"

        vars = list(treatment) + list(outcome) + list(variables)
        df = pd.DataFrame(columns=vars)

        treatment_name = parse_state(treatment)
        outcome_name = parse_state(outcome)

        # Causal model initialization
        causal_model = CausalModel(df, treatment, outcome, graph=causal_graph)

        # Causal identifier identification
        identifier = CausalIdentifier(causal_model._graph,
                                      estimand_type=None,
                                      method_name="default",
                                      proceed_when_unidentifiable=None)

        # Obtain backdoor sets
        path = Backdoor(identifier._graph._graph, treatment_name, outcome_name)
        backdoor_sets = path.get_backdoor_vars()
        print(backdoor_sets)
        # Check if backdoor sets are valid i.e. if they block all paths between the treatment and the outcome
        backdoor_paths = identifier._graph.get_backdoor_paths(
            treatment_name, outcome_name)
        check_set = set(backdoor_sets[0]['backdoor_set'])
        check = identifier._graph.check_valid_backdoor_set(
            treatment_name,
            outcome_name,
            check_set,
            backdoor_paths=backdoor_paths,
            dseparation_algo="naive")
        print(check)
        assert check["is_dseparated"]
示例#23
0
def att_causal_estimator(df,
                         outcome,
                         treatment,
                         cofounders_list,
                         method_name,
                         view_model=False):
    causal_model = CausalModel(data=df,
                               treatment=treatment,
                               outcome=outcome,
                               common_causes=cofounders_list)
    if view_model:
        causal_model.view_model(layout="dot")
    identified_estimand = causal_model.identify_effect(
        proceed_when_unidentifiable=True)
    causal_estimate = causal_model.estimate_effect(
        identified_estimand,
        method_name=method_name,
        target_units='att',  #, confidence_intervals=True
    )
    return (causal_estimate.value)
示例#24
0
 def test_backdoor_estimators(self):
     # Setup data
     data = datasets.linear_dataset(10,
                                    num_common_causes=4,
                                    num_samples=10000,
                                    num_instruments=2,
                                    num_effect_modifiers=2,
                                    num_treatments=1,
                                    treatment_is_binary=False)
     df = data['df']
     model = CausalModel(data=data["df"],
                         treatment=data["treatment_name"],
                         outcome=data["outcome_name"],
                         effect_modifiers=data["effect_modifier_names"],
                         graph=data["gml_graph"])
     identified_estimand = model.identify_effect(
         proceed_when_unidentifiable=True)
     # Test LinearDML
     dml_estimate = model.estimate_effect(
         identified_estimand,
         method_name="backdoor.econml.dml.LinearDML",
         control_value=0,
         treatment_value=1,
         target_units=lambda df: df["X0"] > 1,  # condition used for CATE
         method_params={
             "init_params": {
                 'model_y': GradientBoostingRegressor(),
                 'model_t': GradientBoostingRegressor(),
                 'featurizer': PolynomialFeatures(degree=1,
                                                  include_bias=True)
             },
             "fit_params": {}
         })
     # Test ContinuousTreatmentOrthoForest
     orthoforest_estimate = model.estimate_effect(
         identified_estimand,
         method_name=
         "backdoor.econml.ortho_forest.ContinuousTreatmentOrthoForest",
         target_units=lambda df: df["X0"] > 2,
         method_params={
             "init_params": {
                 'n_trees': 10
             },
             "fit_params": {}
         })
     # Test LinearDRLearner
     data_binary = datasets.linear_dataset(10,
                                           num_common_causes=4,
                                           num_samples=10000,
                                           num_instruments=2,
                                           num_effect_modifiers=2,
                                           treatment_is_binary=True,
                                           outcome_is_binary=True)
     model_binary = CausalModel(
         data=data_binary["df"],
         treatment=data_binary["treatment_name"],
         outcome=data_binary["outcome_name"],
         effect_modifiers=data["effect_modifier_names"],
         graph=data_binary["gml_graph"])
     identified_estimand_binary = model_binary.identify_effect(
         proceed_when_unidentifiable=True)
     drlearner_estimate = model_binary.estimate_effect(
         identified_estimand_binary,
         method_name="backdoor.econml.drlearner.LinearDRLearner",
         target_units=lambda df: df["X0"] > 1,
         confidence_intervals=False,
         method_params={
             "init_params": {
                 'model_propensity':
                 LogisticRegressionCV(cv=3,
                                      solver='lbfgs',
                                      multi_class='auto')
             },
             "fit_params": {}
         })
示例#25
0
 def test_iv_estimators(self):
     # Setup data
     data = datasets.linear_dataset(10,
                                    num_common_causes=4,
                                    num_samples=10000,
                                    num_instruments=2,
                                    num_effect_modifiers=2,
                                    num_treatments=1,
                                    treatment_is_binary=False)
     df = data['df']
     model = CausalModel(data=data["df"],
                         treatment=data["treatment_name"],
                         outcome=data["outcome_name"],
                         effect_modifiers=data["effect_modifier_names"],
                         graph=data["gml_graph"])
     identified_estimand = model.identify_effect(
         proceed_when_unidentifiable=True)
     # Test DeepIV
     dims_zx = len(model._instruments) + len(model._effect_modifiers)
     dims_tx = len(model._treatment) + len(model._effect_modifiers)
     treatment_model = keras.Sequential([
         keras.layers.Dense(
             128, activation='relu',
             input_shape=(dims_zx, )),  # sum of dims of Z and X
         keras.layers.Dropout(0.17),
         keras.layers.Dense(64, activation='relu'),
         keras.layers.Dropout(0.17),
         keras.layers.Dense(32, activation='relu'),
         keras.layers.Dropout(0.17)
     ])
     response_model = keras.Sequential([
         keras.layers.Dense(
             128, activation='relu',
             input_shape=(dims_tx, )),  # sum of dims of T and X
         keras.layers.Dropout(0.17),
         keras.layers.Dense(64, activation='relu'),
         keras.layers.Dropout(0.17),
         keras.layers.Dense(32, activation='relu'),
         keras.layers.Dropout(0.17),
         keras.layers.Dense(1)
     ])
     deepiv_estimate = model.estimate_effect(
         identified_estimand,
         method_name="iv.econml.deepiv.DeepIVEstimator",
         target_units=lambda df: df["X0"] > -1,
         confidence_intervals=False,
         method_params={
             "init_params": {
                 'n_components':
                 10,  # Number of gaussians in the mixture density networks
                 'm':
                 lambda z, x: treatment_model(
                     keras.layers.concatenate([z, x])),  # Treatment model,
                 "h":
                 lambda t, x: response_model(
                     keras.layers.concatenate([t, x])),  # Response model
                 'n_samples':
                 1,  # Number of samples used to estimate the response
                 'first_stage_options': {
                     'epochs': 25
                 },
                 'second_stage_options': {
                     'epochs': 25
                 }
             },
             "fit_params": {}
         })
示例#26
0
文件: base.py 项目: zihanmok/dowhy
    def average_treatment_effect_test(self, dataset="linear", beta=10,
            num_common_causes=1, num_instruments=1,
            num_effect_modifiers=0, num_treatments=1,
            num_frontdoor_variables = 0,
            num_samples=100000,
            treatment_is_binary=True,
            outcome_is_binary=False,
            confidence_intervals=False,
            test_significance=False,
            method_params=None):
        if dataset == "linear":
            data = dowhy.datasets.linear_dataset(beta=beta,
                                             num_common_causes=num_common_causes,
                                             num_instruments=num_instruments,
                                             num_effect_modifiers = num_effect_modifiers,
                                             num_treatments = num_treatments,
                                             num_frontdoor_variables=num_frontdoor_variables,
                                             num_samples=num_samples,
                                             treatment_is_binary=treatment_is_binary,
                                             outcome_is_binary = outcome_is_binary)
        elif dataset == "simple-iv":
            data = dowhy.datasets.simple_iv_dataset(beta=beta,
                    num_treatments = num_treatments,
                    num_samples = num_samples,
                    treatment_is_binary=treatment_is_binary,
                    outcome_is_binary = outcome_is_binary)
        else:
            raise ValueError("Dataset type not supported.")

        model = CausalModel(
            data=data['df'],
            treatment=data["treatment_name"],
            outcome=data["outcome_name"],
            graph=data["gml_graph"],
            proceed_when_unidentifiable=True,
            test_significance=test_significance
        )
        target_estimand = model.identify_effect()
        target_estimand.set_identifier_method(self._identifier_method)
        estimator_ate = self._Estimator(
            data['df'],
            identified_estimand=target_estimand,
            treatment=data["treatment_name"],
            outcome=data["outcome_name"],
            control_value = 0,
            treatment_value = 1,
            test_significance=test_significance,
            evaluate_effect_strength=False,
            confidence_intervals = confidence_intervals,
            target_units = "ate",
            effect_modifiers = data["effect_modifier_names"],
            params=method_params
        )
        true_ate = data["ate"]
        ate_estimate = estimator_ate.estimate_effect()
        str(ate_estimate) # checking if str output is correctly created
        error = abs(ate_estimate.value - true_ate)
        print("Error in ATE estimate = {0} with tolerance {1}%. Estimated={2},True={3}".format(
            error, self._error_tolerance * 100, ate_estimate.value, true_ate)
        )
        res = True if (error < abs(true_ate) * self._error_tolerance) else False
        assert res
        # Compute confidence intervals, standard error and significance tests
        if confidence_intervals:
            ate_estimate.get_confidence_intervals()
            ate_estimate.get_confidence_intervals(confidence_level=0.99)
            ate_estimate.get_confidence_intervals(method="bootstrap")
            ate_estimate.get_standard_error()
            ate_estimate.get_standard_error(method="bootstrap")
        if test_significance:
            ate_estimate.test_stat_significance()
            ate_estimate.test_stat_significance(method="bootstrap")
示例#27
0
import pandas as pd
import dowhy.datasets
from datasets import *
from dowhy import CausalModel

credit_data = get_credit()

model = CausalModel(
    data=credit_data["df"],
    treatment=["YearsEmployed"],
    outcome=["Approved"],
    graph=credit_data["dot_graph"],
)

from sklearn.linear_model import LogisticRegressionCV

# Saves the model as "causal_model.png"
model.view_model(layout="dot")
identified_estimand_binary = model.identify_effect(
    proceed_when_unidentifiable=True)
# estimate = model.estimate_effect(identified_estimand, method_name="backdoor.econml.drlearner.LinearDRLearner")

orthoforest_estimate = model.estimate_effect(
    identified_estimand_binary,
    method_name="backdoor.econml.ortho_forest.ContinuousTreatmentOrthoForest",
    target_units=lambda df: df["Male"] == 1,
    confidence_intervals=False,
    method_params={
        "init_params": {
            'n_trees':
            2,  # not ideal, just as an example to speed up computation
示例#28
0
 def test_graph_refutation2(self, num_variables,num_samples):
     data = dowhy.datasets.dataset_from_random_graph(num_vars = num_variables, num_samples= num_samples)
     df = data["df"]
     gml_str = """
     graph [
     directed 1
     node [
         id 0
         label "a"
     ]
     node [
         id 1
         label "b"
     ]
     node [
         id 2
         label "c"
     ]
     node [
         id 3
         label "d"
     ]
     node [
         id 4
         label "e"
     ]
     node [
         id 5
         label "f"
     ]
     node [
         id 6
         label "g"
     ]
     node [
         id 7
         label "h"
     ]
     node [
         id 8
         label "i"
     ]
     node [
         id 9
         label "j"
     ]
     edge [
         source 0
         target 1
     ]
     edge [
         source 0
         target 3
     ]
     edge [
         source 3
         target 2
     ]
     edge [
         source 7
         target 4
     ]
     edge [
         source 6
         target 5
     ]
     edge [
         source 7
         target 8
     ]
     edge [
         source 9
         target 2
     ]
     edge [
         source 9
         target 8
     ]
     ]
     """
     model = CausalModel(
         data=df,
         treatment=data["treatment_name"],
         outcome=data["outcome_name"],
         graph=gml_str,
     )
     graph_refutation_object = model.refute_graph(k = 2, independence_test = 
     {'test_for_continuous': 'partial_correlation', 
     'test_for_discrete' : 'conditional_mutual_information'})
     assert graph_refutation_object.refutation_result == False
示例#29
0
    def null_refutation_test(self,
                             data=None,
                             dataset="linear",
                             beta=10,
                             num_common_causes=1,
                             num_instruments=1,
                             num_samples=100000,
                             treatment_is_binary=True):
        # Supports user-provided dataset object
        if data is None:
            data = dowhy.datasets.linear_dataset(
                beta=beta,
                num_common_causes=num_common_causes,
                num_instruments=num_instruments,
                num_samples=num_samples,
                treatment_is_binary=treatment_is_binary)

        print(data['df'])

        print("")
        model = CausalModel(data=data['df'],
                            treatment=data["treatment_name"],
                            outcome=data["outcome_name"],
                            graph=data["gml_graph"],
                            proceed_when_unidentifiable=True,
                            test_significance=None)
        target_estimand = model.identify_effect()
        ate_estimate = model.estimate_effect(
            identified_estimand=target_estimand,
            method_name=self.estimator_method,
            test_significance=None)
        true_ate = data["ate"]
        self.logger.debug(true_ate)

        if self.refuter_method == "add_unobserved_common_cause":
            # To test if there are any exceptions
            ref = model.refute_estimate(
                target_estimand,
                ate_estimate,
                method_name=self.refuter_method,
                confounders_effect_on_treatment=self.confounders_effect_on_t,
                confounders_effect_on_outcome=self.confounders_effect_on_y,
                effect_strength_on_treatment=self.effect_strength_on_t,
                effect_strength_on_outcome=self.effect_strength_on_y)
            self.logger.debug(ref.new_effect)

            # To test if the estimate is identical if refutation parameters are zero
            refute = model.refute_estimate(
                target_estimand,
                ate_estimate,
                method_name=self.refuter_method,
                confounders_effect_on_treatment=self.confounders_effect_on_t,
                confounders_effect_on_outcome=self.confounders_effect_on_y,
                effect_strength_on_treatment=0,
                effect_strength_on_outcome=0)
            error = abs(refute.new_effect - ate_estimate.value)

            print(
                "Error in refuted estimate = {0} with tolerance {1}%. Estimated={2},After Refutation={3}"
                .format(error, self._error_tolerance * 100, ate_estimate.value,
                        refute.new_effect))
            res = True if (error < abs(ate_estimate.value) *
                           self._error_tolerance) else False
            assert res

        elif self.refuter_method == "placebo_treatment_refuter":
            if treatment_is_binary is True:
                ref = model.refute_estimate(target_estimand,
                                            ate_estimate,
                                            method_name=self.refuter_method,
                                            num_simulations=10)
            else:
                ref = model.refute_estimate(target_estimand,
                                            ate_estimate,
                                            method_name=self.refuter_method)
            # This value is hardcoded to be zero as we are runnning this on a linear dataset.
            # Ordinarily, we should expect this value to be zero.
            EXPECTED_PLACEBO_VALUE = 0

            error = abs(ref.new_effect - EXPECTED_PLACEBO_VALUE)

            print(
                "Error in the refuted estimate = {0} with tolerence {1}%. Expected Value={2}, After Refutation={3}"
                .format(error, self._error_tolerance * 100,
                        EXPECTED_PLACEBO_VALUE, ref.new_effect))

            print(ref)

            res = True if (error < self._error_tolerance) else False
            assert res

        elif self.refuter_method == "data_subset_refuter":
            if treatment_is_binary is True:
                ref = model.refute_estimate(target_estimand,
                                            ate_estimate,
                                            method_name=self.refuter_method,
                                            num_simulations=5)
            else:
                ref = model.refute_estimate(target_estimand,
                                            ate_estimate,
                                            method_name=self.refuter_method)

            error = abs(ref.new_effect - ate_estimate.value)

            print(
                "Error in the refuted estimate = {0} with tolerence {1}%. Estimated={2}, After Refutation={3}"
                .format(error, self._error_tolerance * 100, ate_estimate.value,
                        ref.new_effect))

            print(ref)

            res = True if (error < abs(ate_estimate.value) *
                           self._error_tolerance) else False
            assert res

        elif self.refuter_method == "bootstrap_refuter":
            if treatment_is_binary is True:
                ref = model.refute_estimate(target_estimand,
                                            ate_estimate,
                                            method_name=self.refuter_method,
                                            num_simulations=5)
            else:
                ref = model.refute_estimate(target_estimand,
                                            ate_estimate,
                                            method_name=self.refuter_method)

            error = abs(ref.new_effect - ate_estimate.value)

            print(
                "Error in the refuted estimate = {0} with tolerence {1}%. Estimated={2}, After Refutation={3}"
                .format(error, self._error_tolerance * 100, ate_estimate.value,
                        ref.new_effect))

            print(ref)

            res = True if (error < abs(ate_estimate.value) *
                           self._error_tolerance) else False
            assert res

        elif self.refuter_method == "dummy_outcome_refuter":
            if self.transformations is None:
                ref = model.refute_estimate(target_estimand,
                                            ate_estimate,
                                            method_name=self.refuter_method,
                                            num_simulations=2)
            else:
                ref = model.refute_estimate(
                    target_estimand,
                    ate_estimate,
                    method_name=self.refuter_method,
                    transformations=self.transformations,
                    params=self.params,
                    num_simulations=2)

                # This value is hardcoded to be zero as we are runnning this on a linear dataset.
                # Ordinarily, we should expect this value to be zero.
                EXPECTED_DUMMY_OUTCOME_VALUE = 0

                error = abs(ref.new_effect - EXPECTED_DUMMY_OUTCOME_VALUE)

                print(
                    "Error in the refuted estimate = {0} with tolerence {1}%. Expected Value={2}, After Refutation={3}"
                    .format(error, self._error_tolerance * 100,
                            EXPECTED_DUMMY_OUTCOME_VALUE, ref.new_effect))

                print(ref)

                assert ref
示例#30
0
 def test_iv_estimators(self):
     keras = pytest.importorskip("keras")
     # Setup data
     data = datasets.linear_dataset(10,
                                    num_common_causes=4,
                                    num_samples=10000,
                                    num_instruments=2,
                                    num_effect_modifiers=2,
                                    num_treatments=1,
                                    treatment_is_binary=False)
     df = data['df']
     model = CausalModel(data=data["df"],
                         treatment=data["treatment_name"],
                         outcome=data["outcome_name"],
                         effect_modifiers=data["effect_modifier_names"],
                         graph=data["gml_graph"])
     identified_estimand = model.identify_effect(
         proceed_when_unidentifiable=True)
     # Test DeepIV
     dims_zx = len(model._instruments) + len(model._effect_modifiers)
     dims_tx = len(model._treatment) + len(model._effect_modifiers)
     treatment_model = keras.Sequential([
         keras.layers.Dense(
             128, activation='relu',
             input_shape=(dims_zx, )),  # sum of dims of Z and X
         keras.layers.Dropout(0.17),
         keras.layers.Dense(64, activation='relu'),
         keras.layers.Dropout(0.17),
         keras.layers.Dense(32, activation='relu'),
         keras.layers.Dropout(0.17)
     ])
     response_model = keras.Sequential([
         keras.layers.Dense(
             128, activation='relu',
             input_shape=(dims_tx, )),  # sum of dims of T and X
         keras.layers.Dropout(0.17),
         keras.layers.Dense(64, activation='relu'),
         keras.layers.Dropout(0.17),
         keras.layers.Dense(32, activation='relu'),
         keras.layers.Dropout(0.17),
         keras.layers.Dense(1)
     ])
     deepiv_estimate = model.estimate_effect(
         identified_estimand,
         method_name="iv.econml.deepiv.DeepIVEstimator",
         target_units=lambda df: df["X0"] > -1,
         confidence_intervals=False,
         method_params={
             "init_params": {
                 'n_components':
                 10,  # Number of gaussians in the mixture density networks
                 # Treatment model,
                 'm':
                 lambda z, x: treatment_model(
                     keras.layers.concatenate([z, x])),
                 # Response model
                 "h":
                 lambda t, x: response_model(
                     keras.layers.concatenate([t, x])),
                 'n_samples':
                 1,  # Number of samples used to estimate the response
                 'first_stage_options': {
                     'epochs': 25
                 },
                 'second_stage_options': {
                     'epochs': 25
                 }
             },
             "fit_params": {}
         })
     # Test IntentToTreatDRIV
     data = datasets.linear_dataset(10,
                                    num_common_causes=4,
                                    num_samples=10000,
                                    num_instruments=1,
                                    num_effect_modifiers=2,
                                    num_treatments=1,
                                    treatment_is_binary=True,
                                    num_discrete_instruments=1)
     df = data['df']
     model = CausalModel(data=data["df"],
                         treatment=data["treatment_name"],
                         outcome=data["outcome_name"],
                         effect_modifiers=data["effect_modifier_names"],
                         graph=data["gml_graph"])
     identified_estimand = model.identify_effect(
         proceed_when_unidentifiable=True)
     driv_estimate = model.estimate_effect(
         identified_estimand,
         method_name="iv.econml.ortho_iv.LinearIntentToTreatDRIV",
         target_units=lambda df: df["X0"] > 1,
         confidence_intervals=False,
         method_params={
             "init_params": {
                 'model_T_XZ': GradientBoostingClassifier(),
                 'model_Y_X': GradientBoostingRegressor(),
                 'flexible_model_effect': GradientBoostingRegressor(),
                 'featurizer': PolynomialFeatures(degree=1,
                                                  include_bias=False)
             },
             "fit_params": {}
         })