def __init__(self, y, x, z, X, alpha, variable_types={}, burn=1000, thin=10, bins={}): self.variable_types = variable_types self.bins = bins self.alpha = alpha self.x = x self.y = y self.z = z if len(X) > 300 or max(len(x + z), len(y + z)) >= 3: self.defaults = EstimatorSettings(n_jobs=4, efficient=True) else: self.defaults = EstimatorSettings(n_jobs=-1, efficient=False) self.densities = self.estimate_densities(x, y, z, X) self.N = len(X) self.mcmc_initialization = X[x + y + z].median().values self.burn = burn self.thin = thin self.null_df = self.generate_ci_sample() _, _, self.chi2_bound = self.discretize_and_get_chi2(self.null_df) self.chi2 = self.discretize_and_get_chi2(X)[1]
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.logger.info("Using KernelDensitySampler for do sampling.") if len(self._data) > 300 or max(len(self._treatment_names+self._target_estimand.backdoor_variables),len(self._outcome_names+self._target_estimand.backdoor_variables)) >= 3: self.defaults=EstimatorSettings(n_jobs=4, efficient=True) else: self.defaults=EstimatorSettings(n_jobs=-1, efficient=False) if 'c' not in self._variable_types.values(): self.bw = 'cv_ml' else: self.bw = 'normal_reference' self.sampler = self._construct_sampler()
def continuous_treatment_model(data, covariates, treatment, variable_types): data, covariates = binarize_discrete(data, covariates, variable_types) if len(data) > 300 or len(treatment + covariates) >= 3: defaults = EstimatorSettings(n_jobs=4, efficient=True) else: defaults = EstimatorSettings(n_jobs=-1, efficient=False) if 'c' not in variable_types.values(): bw = 'cv_ml' else: bw = 'normal_reference' indep_type = get_type_string(covariates, variable_types) dep_type = get_type_string([treatment], variable_types) model = KDEMultivariateConditional(endog=data[treatment], exog=data[covariates], dep_type=''.join(dep_type), indep_type=''.join(indep_type), bw=bw, defaults=defaults) scores = model.pdf(endog_predict=data[treatment], exog_predict=data[covariates]) return scores
def _compute_joint_kde(self, *nodes, normref=True): endog = [self.node_data.info[node]['data'] for node in nodes] t = time.time() if normref: kde = KDEMultivariate(data=endog, var_type='c' * len(nodes), bw='normal_reference') else: kde = KDEMultivariate(data=endog, var_type='c' * len(nodes), bw='cv_ml', defaults=EstimatorSettings(efficient=True)) print("Fit joint KDE for %s in %s seconds" % (nodes, time.time() - t)) self.kdes_joint[nodes] = kde
def _compute_conditional_kde(self, dep, inds, normref=True): endog = self.node_data.info[dep]['data'] exog = [self.node_data.info[node]['data'] for node in inds] t = time.time() if normref: kde = KDEMultivariateConditional(endog=endog, exog=exog, dep_type='c', indep_type='c' * len(exog), bw='normal_reference') else: kde = KDEMultivariateConditional( endog=endog, exog=exog, dep_type='c', indep_type='c' * len(exog), bw='cv_ml', defaults=EstimatorSettings(efficient=True)) print("Fit conditional KDE for %s wrt %s in %s seconds" % (dep, inds, time.time() - t)) self.kdes_conditional[dep][inds] = kde
def __init__( self, X, causes, effects, admissable_set=[], variable_types=None, expectation=False, density=True, ): """ We want to calculate the causal effect of X and Y through back-door adjustment, P(Y|do(X)) = Sum( P(Y|X,Z)P(Z), Z) for some admissable set of control variables, Z. First we calculate the conditional density P(Y|X,Z), then the density P(Z). We find the support of Z so we can properly sum over it later. variable_types are a dictionary with the column name pointing to an element of set(['o', 'u', 'c']), for 'ordered', 'unordered discrete', or 'continuous'. """ conditional_density_vars = causes + admissable_set self.causes = causes self.effects = effects self.admissable_set = list( admissable_set ) # uses a list internally; AdjustForDirectCauses.admissable_set returns a set self.conditional_density_vars = conditional_density_vars if ( len(X) > 300 or max(len(causes + admissable_set), len(effects + admissable_set)) >= 3 ): self.defaults = EstimatorSettings(n_jobs=4, efficient=True) else: self.defaults = EstimatorSettings(n_jobs=-1, efficient=False) if variable_types: self.variable_types = variable_types dep_type = [variable_types[var] for var in effects] indep_type = [variable_types[var] for var in conditional_density_vars] density_types = [variable_types[var] for var in admissable_set] else: self.variable_types = self.__infer_variable_types(X) if "c" not in variable_types.values(): bw = "cv_ml" else: bw = "normal_reference" if admissable_set: self.density = KDEMultivariate( X[admissable_set], var_type="".join(density_types), bw=bw, defaults=self.defaults, ) self.conditional_density = KDEMultivariateConditional( endog=X[effects], exog=X[conditional_density_vars], dep_type="".join(dep_type), indep_type="".join(indep_type), bw=bw, defaults=self.defaults, ) if expectation: self.conditional_expectation = KernelReg( X[effects].values, X[conditional_density_vars].values, "".join(indep_type), bw="cv_ls", ) self.support = self.__get_support(X) self.discrete_variables = [ variable for variable, var_type in self.variable_types.items() if var_type in ["o", "u"] ] self.discrete_Z = list( set(self.discrete_variables).intersection(set(admissable_set)) ) self.continuous_variables = [ variable for variable, var_type in self.variable_types.items() if var_type == "c" ] self.continuous_Z = list( set(self.continuous_variables).intersection(set(admissable_set)) )