def _estimate_effect(self):
        #first_stage_features = self.build_first_stage_features()
        #fs_model = self.first_stage_model()
        #if self._target_estimand.identifier_method=="frontdoor":
        #    first_stage_outcome = self._frontdoor_variables
        #elif self._target_estimand.identifier_method=="mediation":
        #    first_stage_outcome = self._mediators
        #fs_model.fit(first_stage_features, self._frontdoor_variables)
        #self.logger.debug("Coefficients of the fitted model: " +
        #                  ",".join(map(str, fs_model.coef_)))
        #residuals = self._frontdoor_variables - fs_model.predict(first_stage_features)
        #self._data["residual"] = residuals
        estimate_value = None
        # First stage
        modified_target_estimand = copy.deepcopy(self._target_estimand)
        modified_target_estimand.identifier_method="backdoor"
        modified_target_estimand.backdoor_variables = self._target_estimand.mediation_first_stage_confounders
        if self._target_estimand.identifier_method=="frontdoor":
            modified_target_estimand.outcome_variable = parse_state(self._frontdoor_variables_names)
        elif self._target_estimand.identifier_method=="mediation":
            modified_target_estimand.outcome_variable = parse_state(self._mediators_names)

        first_stage_estimate = self.first_stage_model(self._data, 
                 modified_target_estimand,
                 self._treatment_name,
                 parse_state(modified_target_estimand.outcome_variable), 
                 control_value=self._control_value, 
                 treatment_value=self._treatment_value,
                 test_significance=self._significance_test, 
                 evaluate_effect_strength=self._effect_strength_eval,
                 confidence_intervals = self._confidence_intervals,
                 target_units=self._target_units, 
                 effect_modifiers=self._effect_modifier_names,
                 params=self.method_params)._estimate_effect()

        # Second Stage
        modified_target_estimand = copy.deepcopy(self._target_estimand)
        modified_target_estimand.identifier_method="backdoor"
        modified_target_estimand.backdoor_variables = self._target_estimand.mediation_second_stage_confounders
        if self._target_estimand.identifier_method=="frontdoor":
            modified_target_estimand.treatment_variable = parse_state(self._frontdoor_variables_names)
        elif self._target_estimand.identifier_method=="mediation":
            modified_target_estimand.treatment_variable = parse_state(self._mediators_names)

        second_stage_estimate = self.second_stage_model(self._data, 
                 modified_target_estimand,
                 parse_state(modified_target_estimand.treatment_variable), 
                 self._outcome_name,
                 control_value=self._control_value, 
                 treatment_value=self._treatment_value,
                 test_significance=self._significance_test, 
                 evaluate_effect_strength=self._effect_strength_eval,
                 confidence_intervals = self._confidence_intervals,
                 target_units=self._target_units, 
                 effect_modifiers=self._effect_modifier_names,
                 params=self.method_params)._estimate_effect()
        # Combining the two estimates
        natural_direct_effect = first_stage_estimate.value * second_stage_estimate.value
        estimate_value = natural_direct_effect
        self.symbolic_estimator = self.construct_symbolic_estimator(
                first_stage_estimate.realized_estimand_expr,
                second_stage_estimate.realized_estimand_expr, 
                estimand_type=CausalIdentifier.NONPARAMETRIC_NDE)

        if self._target_estimand.estimand_type == CausalIdentifier.NONPARAMETRIC_NIE:
            # Total  effect of treatment
            modified_target_estimand = copy.deepcopy(self._target_estimand)
            modified_target_estimand.identifier_method="backdoor"

            total_effect_estimate = self.second_stage_model(self._data, 
                     modified_target_estimand,
                     self._treatment_name,
                     self._outcome_name,
                     control_value=self._control_value, 
                     treatment_value=self._treatment_value,
                     test_significance=self._significance_test, 
                     evaluate_effect_strength=self._effect_strength_eval,
                     confidence_intervals = self._confidence_intervals,
                     target_units=self._target_units, 
                     effect_modifiers=self._effect_modifier_names,
                     params=self.method_params)._estimate_effect()
            natural_indirect_effect = total_effect_estimate.value - natural_direct_effect
            estimate_value = natural_indirect_effect
            self.symbolic_estimator = self.construct_symbolic_estimator(
                    first_stage_estimate.realized_estimand_expr,
                    second_stage_estimate.realized_estimand_expr,
                    total_effect_estimate.realized_estimand_expr,
                    estimand_type=self._target_estimand.estimand_type)
        return CausalEstimate(estimate=estimate_value,
                              target_estimand=self._target_estimand,
                              realized_estimand_expr=self.symbolic_estimator)
Exemplo n.º 2
0
    def _estimate_effect(self, recalculate_propensity_score=False):
        if self._propensity_score_model is None or recalculate_propensity_score is True:
            self._propensity_score_model = linear_model.LogisticRegression()
            self._propensity_score_model.fit(self._observed_common_causes,
                                             self._treatment)
            self._data['ps'] = self._propensity_score_model.predict_proba(
                self._observed_common_causes)[:, 1]

        # trim propensity score weights
        self._data['ps'] = np.minimum(self.max_ps_score, self._data['ps'])
        self._data['ps'] = np.maximum(self.min_ps_score, self._data['ps'])

        # ips ==> (isTreated(y)/ps(y)) + ((1-isTreated(y))/(1-ps(y)))
        # nips ==> ips / (sum of ips over all units)
        # icps ==> ps(y)/(1-ps(y)) / (sum of (ps(y)/(1-ps(y))) over all control units)
        # itps ==> ps(y)/(1-ps(y)) / (sum of (ps(y)/(1-ps(y))) over all treatment units)
        ipst_sum = sum(self._data[self._treatment_name[0]] / self._data['ps'])
        ipsc_sum = sum(
            (1 - self._data[self._treatment_name[0]]) / (1 - self._data['ps']))
        num_units = len(self._data[self._treatment_name[0]])
        num_treatment_units = sum(self._data[self._treatment_name[0]])
        num_control_units = num_units - num_treatment_units
        # Vanilla IPS estimator

        self._data['ips_weight'] = (1 / num_units) * (
            self._data[self._treatment_name[0]] / self._data['ps'] +
            (1 - self._data[self._treatment_name[0]]) / (1 - self._data['ps']))
        self._data['tips_weight'] = (1 / num_treatment_units) * (
            self._data[self._treatment_name[0]] +
            (1 - self._data[self._treatment_name[0]]) * self._data['ps'] /
            (1 - self._data['ps']))
        self._data['cips_weight'] = (1 / num_control_units) * (
            self._data[self._treatment_name[0]] *
            (1 - self._data['ps']) / self._data['ps'] +
            (1 - self._data[self._treatment_name[0]]))

        # Also known as the Hajek estimator
        self._data['ips_normalized_weight'] = (
            self._data[self._treatment_name[0]] / self._data['ps'] / ipst_sum +
            (1 - self._data[self._treatment_name[0]]) /
            (1 - self._data['ps']) / ipsc_sum)
        ipst_for_att_sum = sum(self._data[self._treatment_name[0]])
        ipsc_for_att_sum = sum((1 - self._data[self._treatment_name[0]]) /
                               (1 - self._data['ps']) * self._data['ps'])
        self._data['tips_normalized_weight'] = (
            self._data[self._treatment_name[0]] / ipst_for_att_sum +
            (1 - self._data[self._treatment_name[0]]) * self._data['ps'] /
            (1 - self._data['ps']) / ipsc_for_att_sum)
        ipst_for_atc_sum = sum(self._data[self._treatment_name[0]] /
                               self._data['ps'] * (1 - self._data['ps']))
        ipsc_for_atc_sum = sum((1 - self._data[self._treatment_name[0]]))
        self._data['cips_normalized_weight'] = (
            self._data[self._treatment_name[0]] *
            (1 - self._data['ps']) / self._data['ps'] / ipst_for_atc_sum +
            (1 - self._data[self._treatment_name[0]]) / ipsc_for_atc_sum)

        # Stabilized weights
        p_treatment = sum(self._data[self._treatment_name[0]]) / num_units
        self._data['ips_stabilized_weight'] = (1 / num_units) * (
            self._data[self._treatment_name[0]] / self._data['ps'] *
            p_treatment + (1 - self._data[self._treatment_name[0]]) /
            (1 - self._data['ps']) * (1 - p_treatment))
        self._data['tips_stabilized_weight'] = (1 / num_treatment_units) * (
            self._data[self._treatment_name[0]] * p_treatment +
            (1 - self._data[self._treatment_name[0]]) * self._data['ps'] /
            (1 - self._data['ps']) * (1 - p_treatment))
        self._data['cips_stabilized_weight'] = (1 / num_control_units) * (
            self._data[self._treatment_name[0]] *
            (1 - self._data['ps']) / self._data['ps'] * p_treatment +
            (1 - self._data[self._treatment_name[0]]) * (1 - p_treatment))

        # Simple normalized estimator (commented out for now)
        #ips_sum = self._data['ips_weight'].sum()
        #self._data['nips_weight'] = self._data['ips_weight'] / ips_sum

        #self._data['ips2'] = self._data['ps'] / (1 - self._data['ps'])
        #treated_ips_sum = (self._data['ips2'] * self._data[self._treatment_name[0]]).sum()
        #control_ips_sum = (self._data['ips2'] * (1 - self._data[self._treatment_name[0]])).sum()
        #self._data['itps_weight'] = self._data['ips2'] / treated_ips_sum
        #self._data['icps_weight'] = self._data['ips2'] / control_ips_sum

        if self._target_units == "ate":
            weighting_scheme_name = self.weighting_scheme
        elif self._target_units == "att":
            weighting_scheme_name = "t" + self.weighting_scheme
        elif self._target_units == "atc":
            weighting_scheme_name = "c" + self.weighting_scheme
        else:
            raise ValueError("Target units string value not supported")

        # Calculating the effect
        self._data['d_y'] = (self._data[weighting_scheme_name] *
                             self._data[self._treatment_name[0]] *
                             self._data[self._outcome_name])
        self._data['dbar_y'] = (self._data[weighting_scheme_name] *
                                (1 - self._data[self._treatment_name[0]]) *
                                self._data[self._outcome_name])
        est = self._data['d_y'].sum() - self._data['dbar_y'].sum()

        # TODO - how can we add additional information into the returned estimate?
        estimate = CausalEstimate(
            estimate=est,
            target_estimand=self._target_estimand,
            realized_estimand_expr=self.symbolic_estimator,
            propensity_scores=self._data["ps"])
        return estimate