Пример #1
0
    def average_treatment_effect_test(self,
                                      dataset="linear",
                                      beta=10,
                                      num_common_causes=1,
                                      num_instruments=1,
                                      num_samples=10000,
                                      treatment_is_binary=True):
        data = dowhy.datasets.linear_dataset(
            beta=beta,
            num_common_causes=num_common_causes,
            num_instruments=num_instruments,
            num_samples=num_samples,
            treatment_is_binary=treatment_is_binary)

        model = CausalModel(data=data['df'],
                            treatment=data["treatment_name"],
                            outcome=data["outcome_name"],
                            graph=data["gml_graph"],
                            proceed_when_unidentifiable=True,
                            test_significance=None)
        target_estimand = model.identify_effect()
        estimator_ate = self._Estimator(data['df'],
                                        identified_estimand=target_estimand,
                                        treatment=data["treatment_name"],
                                        outcome=data["outcome_name"],
                                        test_significance=None)
        true_ate = data["ate"]
        ate_estimate = estimator_ate.estimate_effect()
        error = ate_estimate.value - true_ate
        print(
            "Error in ATE estimate = {0} with tolerance {1}%. Estimated={2},True={3}"
            .format(error, self._error_tolerance * 100, ate_estimate.value,
                    true_ate))
        res = True if (error < true_ate * self._error_tolerance) else False
        assert res
 def do(self, x, method='weighting', num_cores=1, variable_types={}, outcome=None, params=None, dot_graph=None,
        common_causes=None, instruments=None, estimand_type='ate', proceed_when_unidentifiable=False,
        stateful=False):
     x, keep_original_treatment = self.parse_x(x)
     if not stateful or method != self._method:
         self.reset()
     if not self._causal_model:
         self._causal_model = CausalModel(self._obj,
                                          [xi for xi in x.keys()][0],
                                          outcome,
                                          graph=dot_graph,
                                          common_causes=common_causes,
                                          instruments=instruments,
                                          estimand_type=estimand_type,
                                          proceed_when_unidentifiable=proceed_when_unidentifiable)
     self._identified_estimand = self._causal_model.identify_effect()
     if not self._sampler:
         self._method = method
         do_sampler_class = do_samplers.get_class_object(method + "_sampler")
         self._sampler = do_sampler_class(self._obj,
                                          self._identified_estimand,
                                          self._causal_model._treatment,
                                          self._causal_model._outcome,
                                          params=params,
                                          variable_types=variable_types,
                                          num_cores=num_cores,
                                          causal_model=self._causal_model,
                                          keep_original_treatment=keep_original_treatment)
     result = self._sampler.do_sample(x)
     if not stateful:
         self.reset()
     return result
Пример #3
0
    def model(self, force_again=False):

        if self.t_model is None or force_again:
            self.t_model = CausalModel(data=self.data,
                                       treatment='TOJ',
                                       outcome='IntCur',
                                       graph=self.gml_graph)
            # CausalModel(data=self.data["df"],
            #                        treatment=self.data["treatment_name"],
            #                        outcome=self.data["outcome_name"],
            #                        graph=self.data["gml_graph"])

        return self.t_model
Пример #4
0
    def plot(self, *args, **kwargs):
        if kwargs.get('method_name'):
            method_name = kwargs.get('method_name')
        else:
            method_name = "backdoor.propensity_score_matching"
        logging.info("Using {} for estimation.".format(method_name))

        if kwargs.get('common_causes'):
            self.use_graph = False
        elif kwargs.get('dot_graph'):
            self.use_graph = True
        else:
            raise Exception("You must specify a method for determining a backdoor set.")

        if self.use_graph:
            model = CausalModel(data=self._obj,
                                treatment=self._obj[kwargs["treatment_name"]],
                                outcome=self._obj[kwargs["outcome_name"]],
                                graph=args["dot_graph"])
        else:
            model = CausalModel(data=self._obj,
                                treatment=self._obj[kwargs["treatment_name"]],
                                outcome=self._obj[kwargs["outcome_name"]],
                                common_causes=args["common_causes"])
        if kwargs['kind'] == 'bar':
            identified_estimand = model.identify_effect()
            estimate = model.estimate_effect(identified_estimand,
                                             method_name=method_name)
        elif kwargs['kind'] == 'line' or not kwargs['kind'].get():
            identified_estimand = model.identify_effect()
            estimate = model.estimate_effect(identified_estimand,
                                             method_name=method_name)
        else:
            raise Exception("Plot type {} not supported for causal plots!".format(kwargs.get('kind')))
        self._obj.plot(*args, **kwargs)
Пример #5
0
def CalDoWhy(dat):
    model = CausalModel(data=dat["df"],
                        treatment=dat["treatment_name"],
                        outcome=dat["outcome_name"],
                        graph=dat["gml_graph"])

    # Identification
    identified_estimand = model.identify_effect()

    # Estimation
    causal_estimate = model.estimate_effect(
        identified_estimand, method_name="backdoor.linear_regression")

    return causal_estimate
Пример #6
0
def CalPSR(dat):
    model = CausalModel(
        data=dat["df"],
        treatment=dat["treatment_name"],
        outcome=dat["outcome_name"],
        graph=dat["gml_graph"]
    )

    treatment_name = model._treatment
    outcome_name = model._outcome
    common_causes_name = model._graph.get_common_causes(treatment_name, outcome_name)

    data = dat["df"]
    treatment = data[treatment_name]
    outcome = data[outcome_name]
    if("U" in common_causes_name):
        common_causes_name.remove("U")
    common_causes = data[common_causes_name]
    reg_ps = LinearRegression().fit(common_causes, treatment)

    ps = reg_ps.predict(common_causes)

    X = pd.DataFrame({"Treatment": treatment, "PS": ps})

    psr = LinearRegression().fit(X, outcome)

    return psr.coef_[0]
class CausalAccessor(object):
    def __init__(self, pandas_obj):
        self._obj = pandas_obj
        self._causal_model = None
        self._sampler = None
        self._identified_estimand = None
        self._method = None

    def reset(self):
        self._causal_model = None
        self._identified_estimand = None
        self._sampler = None
        self._method = None

    def do(self, x, method='weighting', num_cores=1, variable_types={}, outcome=None, params=None, dot_graph=None,
           common_causes=None, instruments=None, estimand_type='ate', proceed_when_unidentifiable=False,
           stateful=False):
        x, keep_original_treatment = self.parse_x(x)
        if not stateful or method != self._method:
            self.reset()
        if not self._causal_model:
            self._causal_model = CausalModel(self._obj,
                                             [xi for xi in x.keys()][0],
                                             outcome,
                                             graph=dot_graph,
                                             common_causes=common_causes,
                                             instruments=instruments,
                                             estimand_type=estimand_type,
                                             proceed_when_unidentifiable=proceed_when_unidentifiable)
        self._identified_estimand = self._causal_model.identify_effect()
        if not self._sampler:
            self._method = method
            do_sampler_class = do_samplers.get_class_object(method + "_sampler")
            self._sampler = do_sampler_class(self._obj,
                                             self._identified_estimand,
                                             self._causal_model._treatment,
                                             self._causal_model._outcome,
                                             params=params,
                                             variable_types=variable_types,
                                             num_cores=num_cores,
                                             causal_model=self._causal_model,
                                             keep_original_treatment=keep_original_treatment)
        result = self._sampler.do_sample(x)
        if not stateful:
            self.reset()
        return result

    def parse_x(self, x):
        if type(x) == str:
            return {x: None}, True
        if type(x) == list:
            return {xi: None for xi in x}, True
        if type(x) == dict:
            return x, False
        raise Exception('x format not recognized: {}'.format(type(x)))
Пример #8
0
 def custom_data_average_treatment_effect_test(self, data):
     model = CausalModel(data=data['df'],
                         treatment=data["treatment_name"],
                         outcome=data["outcome_name"],
                         graph=data["gml_graph"],
                         proceed_when_unidentifiable=True,
                         test_significance=None)
     target_estimand = model.identify_effect()
     estimator_ate = self._Estimator(data['df'],
                                     identified_estimand=target_estimand,
                                     treatment=data["treatment_name"],
                                     outcome=data["outcome_name"],
                                     test_significance=None)
     true_ate = data["ate"]
     ate_estimate = estimator_ate.estimate_effect()
     error = ate_estimate.value - true_ate
     print(
         "Error in ATE estimate = {0} with tolerance {1}%. Estimated={2},True={3}"
         .format(error, self._error_tolerance * 100, ate_estimate.value,
                 true_ate))
     res = True if (error < true_ate * self._error_tolerance) else False
     assert res
Пример #9
0
def register_graph():
    digraph = request.args.get('digraph')
    dataset = request.args.get('dataset')
    treatment_name = request.args.get('treatment')
    outcome_name = request.args.get('outcome')
    df = dataiku.Dataset(dataset).get_dataframe()

    model = CausalModel(
        data=df,
        treatment=treatment_name,
        outcome=outcome_name,
        graph=digraph,
    )

    identified_estimand = model.identify_effect()
    causal_estimate_reg = model.estimate_effect(
        identified_estimand,
        method_name="backdoor.linear_regression",
        test_significance=True)

    d = {'results': str(causal_estimate_reg)}

    return json.dumps(d)
Пример #10
0
    def null_refutation_test(self,
                             data=None,
                             dataset="linear",
                             beta=10,
                             num_common_causes=1,
                             num_instruments=1,
                             num_samples=100000,
                             treatment_is_binary=True):
        # Supports user-provided dataset object
        if data is None:
            data = dowhy.datasets.linear_dataset(
                beta=beta,
                num_common_causes=num_common_causes,
                num_instruments=num_instruments,
                num_samples=num_samples,
                treatment_is_binary=treatment_is_binary)

        model = CausalModel(data=data['df'],
                            treatment=data["treatment_name"],
                            outcome=data["outcome_name"],
                            graph=data["gml_graph"],
                            proceed_when_unidentifiable=True,
                            test_significance=None)
        target_estimand = model.identify_effect()
        ate_estimate = model.estimate_effect(
            identified_estimand=target_estimand,
            method_name=self.estimator_method,
            test_significance=None)
        true_ate = data["ate"]

        # To test if there are any exceptions
        ref = model.refute_estimate(
            target_estimand,
            ate_estimate,
            method_name=self.refuter_method,
            confounders_effect_on_treatment=self.confounders_effect_on_t,
            confounders_effect_on_outcome=self.confounders_effect_on_y,
            effect_strength_on_treatment=self.effect_strength_on_t,
            effect_strength_on_outcome=self.effect_strength_on_y)
        # To test if the estimate is identical if refutation parameters are zero
        refute = model.refute_estimate(
            target_estimand,
            ate_estimate,
            method_name=self.refuter_method,
            confounders_effect_on_treatment=self.confounders_effect_on_t,
            confounders_effect_on_outcome=self.confounders_effect_on_y,
            effect_strength_on_treatment=0,
            effect_strength_on_outcome=0)
        error = abs(refute.new_effect - ate_estimate.value)
        print(
            "Error in refuted estimate = {0} with tolerance {1}%. Estimated={2},After Refutation={3}"
            .format(error, self._error_tolerance * 100, ate_estimate.value,
                    refute.new_effect))
        res = True if (
            error < abs(ate_estimate.value) * self._error_tolerance) else False
        assert res
Пример #11
0
 def do(self,
        x,
        method=None,
        num_cores=1,
        variable_types={},
        outcome=None,
        params=None,
        dot_graph=None,
        common_causes=None,
        instruments=None,
        estimand_type='ate',
        proceed_when_unidentifiable=False,
        keep_original_treatment=False,
        use_previous_sampler=False):
     if not method:
         raise Exception("You must specify a do sampling method.")
     if not self._obj._causal_model or not use_previous_sampler:
         self._obj._causal_model = CausalModel(
             self._obj, [xi for xi in x.keys()][0],
             outcome,
             graph=dot_graph,
             common_causes=common_causes,
             instruments=instruments,
             estimand_type=estimand_type,
             proceed_when_unidentifiable=proceed_when_unidentifiable)
     self._obj._identified_estimand = self._obj._causal_model.identify_effect(
     )
     do_sampler_class = do_samplers.get_class_object(method + "_sampler")
     if not self._obj._sampler or not use_previous_sampler:
         self._obj._sampler = do_sampler_class(
             self._obj,
             self._obj._identified_estimand,
             self._obj._causal_model._treatment,
             self._obj._causal_model._outcome,
             params=params,
             variable_types=variable_types,
             num_cores=num_cores,
             causal_model=self._obj._causal_model,
             keep_original_treatment=keep_original_treatment)
     return self._obj._sampler.do_sample(x)
Пример #12
0
import dowhy.datasets






z= [i for i in range(10)]

random.shuffle(z)

df = pd.DataFrame(data = {'Z': z, 'X': range(0,10), 'Y': range(0,100,10)})


df


dir = "C:\\Users\\T149900\\source\\repos\\PythonApplication2\\PythonApplication2\\"

# With GML file
model = CausalModel(data = df, treatment='X', outcome='Y', graph= dir + "test.gml")




model



Пример #13
0
import dowhy
# from dowhy import CausalModel
from IPython.display import Image, display

# I. Generating dummy data
# We generate some dummy data for three variables: X, Y and Z.
from dowhy.do_why import CausalModel

z = [i for i in range(10)]
random.shuffle(z)
df = pd.DataFrame(data={'Z': z, 'X': range(0, 10), 'Y': range(0, 100, 10)})
print(df)

# II. Loading GML or DOT graphs
# GML format

# With GML string
model = CausalModel(data=df,
                    treatment='X',
                    outcome='Y',
                    graph="""graph[directed 1 node[id "Z" label "Z"]  
                    node[id "X" label "X"]
                    node[id "Y" label "Y"]      
                    edge[source "Z" target "X"]    
                    edge[source "Z" target "Y"]     
                    edge[source "X" target "Y"]]""")
model.view_model()

display(Image(filename="causal_model_simple_example.png"))
Пример #14
0
for i in range(1, 26):
    col.append("x" + str(i))
data.columns = col
data.head()
print(data)

# Model
# Create a causal model from the data and given common causes.
xs = ""
for i in range(1, 26):
    xs += ("x" + str(i) + "+")

model = CausalModel(
    data=data,
    treatment='treatment',
    outcome='y_factual',
    common_causes=xs.split('+')
)

# Identify
# Identify the causal effect
identified_estimand = model.identify_effect()

# Estimate (using different methods)

# 3.1 Using Linear Regression
# Estimate the causal effect and compare it with Average Treatment Effect
estimate = model.estimate_effect(identified_estimand,
                                 method_name="backdoor.linear_regression", test_significance=True
                                 )
import dowhy
from dowhy.do_why import CausalModel
import dowhy.datasets

if __name__ == "__main__":
    data = dowhy.datasets.linear_dataset(beta=10,
                                         num_common_causes=5,
                                         num_instruments=2,
                                         num_samples=10000,
                                         treatment_is_binary=True)
    # Create a causal model from the data and given graph.
    model = CausalModel(
        data=data["df"],
        treatment=data["treatment_name"],
        outcome=data["outcome_name"],
        graph=data["dot_graph"])

    identified_estimand = model.identify_effect()

    estimate = model.estimate_effect(
        identified_estimand,
        method_name="backdoor.linear_regression")
    print("Causal Estimate is " + str(estimate.value))

    # Adding a random common cause variable
    res_random = model.refute_estimate(
        identified_estimand, estimate,
        method_name="random_common_cause")
    print(res_random)

    # Replacing treatment with a random (placebo) variable
Пример #16
0
from dowhy.do_why import CausalModel

data = dowhy.datasets.linear_dataset(beta=10,
                                     num_common_causes=5,
                                     num_instruments=2,
                                     num_samples=10000,
                                     treatment_is_binary=True)
df = data["df"]
print(df.head())
print(data["dot_graph"])
print("\n")
print(data["gml_graph"])

# With graph
model = CausalModel(data=df,
                    treatment=data["treatment_name"],
                    outcome=data["outcome_name"],
                    graph=data["gml_graph"])

model.view_model()

from IPython.display import Image, display

display(Image(filename="causal_model_simple_example.png"))

# DoWhy philosophy: Keep identification and estimation separate
# Identification can be achieved without access to the data, acccesing only the graph.
# This results in an expression to be computed.
# This expression can then be evaluated using the available data in the estimation step.
# It is important to understand that these are orthogonal steps.

identified_estimand = model.identify_effect()
Пример #17
0
class DoWhyExample:
    data_old = ds.linear_dataset(beta=10,
                                 num_common_causes=5,
                                 num_instruments=5,
                                 num_samples=10000,
                                 treatment_is_binary=True)

    gml_graph = ('graph[directed 1'
                 'node[ id "TOJ" label "TOJ"]'
                 'node[ id "IntCur" label "IntCur"]'
                 'node[ id "U" label "Unobserved Confounders"]'
                 'edge[source "TOJ" target "IntCur"]'
                 'edge[source "U" target "TOJ"]'
                 'edge[source "U" target "IntCur"]')

    gml_graph = add_node(gml_graph, "YeshivaAdults", "IntCur")
    gml_graph = add_node(gml_graph, "Sex", "IntCur")
    gml_graph = add_node(gml_graph, "Age", "IntCur")
    gml_graph = connect_node(gml_graph, "Age", "TOJ")
    gml_graph = connect_node(gml_graph, "Age", "YeshivaAdults")
    gml_graph = connect_node(gml_graph, "Sex", "YeshivaAdults")
    gml_graph = connect_node(gml_graph, "TOJ", "YeshivaAdults")
    gml_graph = gml_graph + ']'
    # table
    # ID    Age     Sex     TOJ (Orthodox)? (Treatment?)     Yeshiva?    Intell. Curios? (Outcome)

    data = pd.DataFrame(
        np.array([[30.0, 1.0, 1.0, 1.0, 0.0], [40.0, 1.0, 0.0, 0.0, 1.0]]),
        columns=['Age', 'Sex', 'TOJ', 'YeshivaAdults', 'IntCur'])
    #
    t_model = None
    t_identify = None
    t_estimate = None

    def model(self, force_again=False):

        if self.t_model is None or force_again:
            self.t_model = CausalModel(data=self.data,
                                       treatment='TOJ',
                                       outcome='IntCur',
                                       graph=self.gml_graph)
            # CausalModel(data=self.data["df"],
            #                        treatment=self.data["treatment_name"],
            #                        outcome=self.data["outcome_name"],
            #                        graph=self.data["gml_graph"])

        return self.t_model

    def identify(self, force_again=False):
        if self.t_identify is None or force_again:
            if self.t_model is None or force_again:
                self.model(force_again=force_again)
            self.t_identify = self.t_model.identify_effect()
        return self.t_identify

    def estimate(self,
                 method_name="backdoor.propensity_score_matching",
                 force_again=False):
        if self.t_estimate is None or force_again:
            self.t_estimate = self.t_model.estimate_effect(
                self.identify(force_again), method_name)
        return self.t_estimate

    def refute(self, method_name="random_common_cause", force_again=False):
        return self.model(force_again=force_again).refute_estimate(
            self.identify(force_again),
            self.estimate(force_again=force_again),
            method_name=method_name)
Пример #18
0
class CausalAccessor(object):
    def __init__(self, pandas_obj):
        """
        An accessor for the pandas.DataFrame under the `causal` namespace.

        :param pandas_obj:
        """
        self._obj = pandas_obj
        self._causal_model = None
        self._sampler = None
        self._identified_estimand = None
        self._method = None

    def reset(self):
        """
        If a `causal` namespace method (especially `do`) was run statefully, this resets the namespace.

        :return:
        """
        self._causal_model = None
        self._identified_estimand = None
        self._sampler = None
        self._method = None

    def do(self,
           x,
           method='weighting',
           num_cores=1,
           variable_types={},
           outcome=None,
           params=None,
           dot_graph=None,
           common_causes=None,
           estimand_type='ate',
           proceed_when_unidentifiable=False,
           stateful=False):
        """
        The do-operation implemented with sampling. This will return a pandas.DataFrame with the outcome
        variable(s) replaced with samples from P(Y|do(X=x)).

        If the value of `x` is left unspecified (e.g. as a string or list), then the original values of `x` are left in
        the DataFrame, and Y is sampled from its respective P(Y|do(x)). If the value of `x` is specified (passed with a
        `dict`, where variable names are keys, and values are specified) then the new `DataFrame` will contain the
        specified values of `x`.

        For some methods, the `variable_types` field must be specified. It should be a `dict`, where the keys are
        variable names, and values are 'o' for ordered discrete, 'u' for un-ordered discrete, 'd' for discrete, or 'c'
        for continuous.

        Inference requires a set of control variables. These can be provided explicitly using `common_causes`, which
        contains a list of variable names to control for. These can be provided implicitly by specifying a causal graph
        with `dot_graph`, from which they will be chosen using the default identification method.

        When the set of control variables can't be identified with the provided assumptions, a prompt will raise to the
        user asking whether to proceed. To automatically over-ride the prompt, you can set the flag
        `proceed_when_unidentifiable` to `True`.

        Some methods build components during inference which are expensive. To retain those components for later
        inference (e.g. successive calls to `do` with different values of `x`), you can set the `stateful` flag to `True`.
        Be cautious about using the `do` operation statefully. State is set on the namespace, rather than the method, so
        can behave unpredictably. To reset the namespace and run statelessly again, you can call the `reset` method.

        :param x: str, list, dict: The causal state on which to intervene, and (optional) its interventional value(s).
        :param method: The inference method to use with the sampler. Currently, `'mcmc'`, `'weighting'`, and
        `'kernel_density'` are supported.
        :param num_cores: int: if the inference method only supports sampling a point at a time, this will parallelize
        sampling.
        :param variable_types: dict: The dictionary containing the variable types. Must contain the union of the causal
        state, control variables, and the outcome.
        :param outcome: str: The outcome variable.
        :param params: dict: extra parameters to set as attributes on the sampler object
        :param dot_graph: str: A string specifying the causal graph.
        :param common_causes: list: A list of strings containing the variable names to control for.
        :param estimand_type: str: 'ate' is the only one currently supported. Others may be added later, to allow for
        CATE estimation.
        :param proceed_when_unidentifiable: bool: A flag to over-ride user prompts to proceed when effects aren't
        identifiable with the assumptions provided.
        :param stateful: bool: Whether to retain state. By default, the do operation is stateless.
        :return: pandas.DataFrame: A DataFrame containing the sampled outcome
        """
        x, keep_original_treatment = self.parse_x(x)
        if not stateful or method != self._method:
            self.reset()
        if not self._causal_model:
            self._causal_model = CausalModel(
                self._obj, [xi for xi in x.keys()][0],
                outcome,
                graph=dot_graph,
                common_causes=common_causes,
                instruments=None,
                estimand_type=estimand_type,
                proceed_when_unidentifiable=proceed_when_unidentifiable)
        self._identified_estimand = self._causal_model.identify_effect()
        if not self._sampler:
            self._method = method
            do_sampler_class = do_samplers.get_class_object(method +
                                                            "_sampler")
            self._sampler = do_sampler_class(
                self._obj,
                self._identified_estimand,
                self._causal_model._treatment,
                self._causal_model._outcome,
                params=params,
                variable_types=variable_types,
                num_cores=num_cores,
                causal_model=self._causal_model,
                keep_original_treatment=keep_original_treatment)
        result = self._sampler.do_sample(x)
        if not stateful:
            self.reset()
        return result

    def parse_x(self, x):
        if type(x) == str:
            return {x: None}, True
        if type(x) == list:
            return {xi: None for xi in x}, True
        if type(x) == dict:
            return x, False
        raise Exception('x format not recognized: {}'.format(type(x)))
Пример #19
0
import dowhy
from dowhy.do_why import CausalModel
import dowhy.datasets
import Pygraphviz

# Load some sample data
data = dowhy.datasets.linear_dataset(beta=10,
                                     num_common_causes=5,
                                     num_instruments=2,
                                     num_samples=10000,
                                     treatment_is_binary=True)

# Create a causal model from the data and given graph.
model = CausalModel(
    data=data["df"],
    treatment=data["treatment_name"],
    outcome=data["outcome_name"],
    graph=data["dot_graph"],
)

# Identify causal effect and return target estimands
identified_estimand = model.identify_effect()

# Estimate the target estimand using a statistical method.
estimate = model.estimate_effect(
    identified_estimand, method_name="backdoor.propensity_score_matching")

# Refute the obtained estimate using multiple robustness checks.
refute_results = model.refute_estimate(identified_estimand,
                                       estimate,
                                       method_name="random_common_cause")
        method_name="iv.regression_discontinuity",
        method_params={
            'rd_variable_name': 'Z1',
            'rd_threshold_value': 0.5,
            'rd_bandwidth': 0.1
        })

    print(causal_estimate_regdist)
    print("Causal Estimate is " + str(causal_estimate_regdist.value))


if __name__ == "__main__":
    data = dowhy.datasets.linear_dataset(beta=10,
                                         num_common_causes=5,
                                         num_instruments=2,
                                         num_samples=10000,
                                         treatment_is_binary=True)

    # With graph
    model = CausalModel(data=data['df'],
                        treatment=data["treatment_name"],
                        outcome=data["outcome_name"],
                        graph=data["dot_graph"],
                        instruments=data["instrument_names"],
                        logging_level=logging.INFO)

    model.view_model()
    identified_estimand = model.identify_effect()
    print(identified_estimand)
    regression(model, identified_estimand)
Пример #21
0
import numpy as np
import pandas as pd

import dowhy
from dowhy.do_why import CausalModel
import dowhy.datasets

data = dowhy.datasets.linear_dataset(beta=10,
                                     num_common_causes=5,
                                     num_instruments=2,
                                     num_samples=10000,
                                     treatment_is_binary=True)
df = data["df"]
print(df.head())
print(data["dot_graph"])
print("\n")
print(data["gml_graph"])

# With graph
model = CausalModel(data=df,
                    treatment=data["treatment_name"],
                    outcome=data["outcome_name"],
                    graph=data["gml_graph"])

identified_estimand = model.identify_effect()
print(identified_estimand)

causal_estimate = model.estimate_effect(
    identified_estimand, method_name="backdoor.linear_regression")
print(causal_estimate)
print("Causal Estimate is " + str(causal_estimate.value))