def test_empty_hinds(self): for h_model in ['linear', 'forest']: for classification in [True, False]: X1 = np.random.normal(0, 1, size=(500, 5)) X2 = np.random.choice([0, 1], size=(500, 1)) X3 = np.random.choice([0, 1, 2], size=(500, 1)) X = np.hstack((X1, X2, X3)) X_df = pd.DataFrame(X, columns=[f"x{i} " for i in range(7)]) y = np.random.choice([0, 1], size=(500, )) y_df = pd.Series(y) # model hetero_inds = [[], [], []] feat_inds = [1, 3, 5] categorical = [5, 6] ca = CausalAnalysis(feat_inds, categorical, heterogeneity_inds=hetero_inds, classification=classification, nuisance_models='linear', heterogeneity_model=h_model, n_jobs=-1) ca.fit(X_df, y) eff = ca.global_causal_effect(alpha=0.05) eff = ca.local_causal_effect(X_df, alpha=0.05) for ind in feat_inds: pto = ca._policy_tree_output(X_df, ind) ca._individualized_policy_dict(X_df, ind)
def test_individualized_policy(self): y_arr = np.random.choice([0, 1], size=(500, )) X = pd.DataFrame({ 'a': np.random.normal(size=500), 'b': np.random.normal(size=500), 'c': np.random.choice([0, 1], size=500), 'd': np.random.choice(['a', 'b', 'c'], size=500) }) inds = ['a', 'b', 'c', 'd'] cats = ['c', 'd'] hinds = ['a', 'd'] for y in [pd.Series(y_arr), y_arr.reshape(-1, 1)]: for classification in [True, False]: ca = CausalAnalysis(inds, cats, hinds, heterogeneity_model='linear', classification=classification) ca.fit(X, y) df = ca.individualized_policy(X, 'a') self.assertEqual(df.shape[0], 500) # all rows included by default self.assertEqual( df.shape[1], 4 + X.shape[1] ) # new cols for policy, effect, upper and lower bounds df = ca.individualized_policy(X, 'b', n_rows=5) self.assertEqual(df.shape[0], 5) self.assertEqual( df.shape[1], 4 + X.shape[1] ) # new cols for policy, effect, upper and lower bounds # verify that we can use a scalar treatment cost df = ca.individualized_policy(X, 'c', treatment_costs=100) self.assertEqual(df.shape[0], 500) self.assertEqual( df.shape[1], 4 + X.shape[1] ) # new cols for policy, effect, upper and lower bounds # verify that we can specify per-treatment costs for each sample df = ca.individualized_policy( X, 'd', alpha=0.05, treatment_costs=np.random.normal(size=(500, 2))) self.assertEqual(df.shape[0], 500) self.assertEqual( df.shape[1], 4 + X.shape[1] ) # new cols for policy, effect, upper and lower bounds dictionary = ca._individualized_policy_dict(X, 'a')