def test_PE_F(self): for adaptive_weights in [ 'ridge', 'decisiontree', 'ridgeCV','decisiontreeCV']: #False, is_correct = 1 try: model = None if adaptive_weights == False: df, true_TE = generate_uniform_given_importance(num_control=100, num_treated=100, num_cov=7, min_val=0, max_val=3, covar_importance=[4,3,2,1,0,0,0]) holdout, true_TE = generate_uniform_given_importance(num_control=100, num_treated=100, num_cov=7, min_val=0, max_val=3, covar_importance=[4,3,2,1,0,0,0]) covar_importance = np.array([4,3,2,1,0,0,0]) weight_array = covar_importance/covar_importance.sum() model = matching.DAME(repeats=False, verbose=0,adaptive_weights =adaptive_weights) model.fit(holdout_data=holdout,weight_array = list(weight_array)) output = model.predict(df) else: df, true_TE = generate_uniform_given_importance() holdout, true_TE = generate_uniform_given_importance() model = matching.DAME(repeats=False, verbose=0,adaptive_weights =adaptive_weights) model.fit(holdout_data=holdout) output = model.predict(df) if check_statistics(model): is_correct = 0 break except (KeyError, ValueError): is_correct = 0 self.assertEqual(1, is_correct, msg='DAME-Error when we use PE method: {0} '.format(adaptive_weights))
def test_verbose_F(self): #Test verbose df, true_TE = generate_uniform_given_importance() for verbose in [0,1,2,3]: is_correct = 1 try: df, true_TE = generate_uniform_given_importance(num_control=1000, num_treated=1000, num_cov=7, min_val=0, max_val=3, covar_importance=[4,3,2,1,0,0,0]) holdout, true_TE = generate_uniform_given_importance(num_control=100, num_treated=100, num_cov=7, min_val=0, max_val=3, covar_importance=[4,3,2,1,0,0,0]) covar_importance = np.array([4,3,2,1,0,0,0]) weight_array = covar_importance/covar_importance.sum() model = matching.DAME(missing_data_replace = 2, want_bf = True, verbose = verbose) model.fit(holdout_data=holdout) output = model.predict(df) model = matching.DAME(verbose=verbose) # repeats = True model.fit(holdout_data=0.5) output = model.predict(df) if check_statistics(model): is_correct = 0 break except (KeyError, ValueError): is_correct = 0 self.assertEqual(1, is_correct, msg='DAME-Error when verbose ={0}'.format(verbose))
def test_miss_data_F(self): is_correct = 1 try: for missing_holdout_replace in [0,1,2]: for missing_data_replace in [0,1,2]: #Test missig data handling df, true_TE = generate_uniform_given_importance(num_control=1000, num_treated=1000) #Create missing df m,n = df.shape for i in range(int(m/10)): for j in [0,int(n/2)]: df.iloc[i,j] = np.nan holdout = df.copy() model = matching.DAME(repeats = False,missing_holdout_replace = missing_holdout_replace,missing_data_replace=missing_data_replace ) model.fit(holdout_data=holdout) output = model.predict(df) if check_statistics(model): is_correct = 0 break except (KeyError, ValueError): is_correct = 0 self.assertEqual(1, is_correct, msg='DAME-Error when do missing data'\ 'handling with missing_holdout_replace = {0},missing_data_replace{1}'.format(missing_holdout_replace,missing_data_replace))
def test_datasets_F(self): df_path = os.path.join((os.path.dirname(__file__)), 'basicTestData.csv') for gen in [generate_uniform_given_importance,generate_binomial_given_importance,generate_binomial_decay_importance,df_path]: is_correct = 1 try: df = None holdout = None if type(gen) != str: df, true_TE = gen() holdout, true_TE = gen() else: df = gen holdout = gen model = matching.DAME(repeats=False) model.fit(holdout_data=holdout) output = model.predict(df) if check_statistics(model): is_correct = 0 break except (KeyError, ValueError): is_correct = 0 self.assertEqual(1, is_correct, msg='DAME-Error when we use the dataset generated by {0} '.format(str(gen)))
def test_miss_data_indicator_F(self): is_correct = 1 try: df, true_TE = generate_uniform_given_importance(num_control=1000, num_treated=1000) #Create missing df m, n = df.shape for i in range(int(m / 100)): for j in [0, int(n / 2)]: df.iloc[i, j] = 'a' holdout = df.copy() model = matching.DAME(missing_indicator='a', missing_holdout_replace=1, missing_data_replace=1) model.fit(holdout_data=holdout) output = model.predict(df) if check_statistics(model): is_correct = 0 except (KeyError, ValueError): is_correct = 0 self.assertEqual(1, is_correct, msg='DAME-Error when missing_indicator')
def test_repeats_F(self): #Test other parameters df, true_TE = generate_uniform_given_importance( num_control=100, num_treated=100, num_cov=7, min_val=0, max_val=3, covar_importance=[4, 3, 2, 1, 0, 0, 0]) holdout, true_TE = generate_uniform_given_importance( num_control=100, num_treated=100, num_cov=7, min_val=0, max_val=3, covar_importance=[4, 3, 2, 1, 0, 0, 0]) is_correct = 1 try: model = matching.DAME(repeats=True) model.fit(holdout_data=holdout) output = model.predict(df) if check_statistics(model): is_correct = 0 except (KeyError, ValueError): is_correct = 0 self.assertEqual(1, is_correct, msg='DAME-Error when repeat = True')
def test_other_param_F(self): is_correct = 1 try: df, true_TE = generate_uniform_given_importance(num_control=1000, num_treated=1000, num_cov=7, min_val=0, max_val=3, covar_importance=[4,3,2,1,0,0,0]) holdout, true_TE = generate_uniform_given_importance(num_control=100, num_treated=100, num_cov=7, min_val=0, max_val=3, covar_importance=[4,3,2,1,0,0,0]) model = matching.DAME( early_stop_pe= 1, verbose=0) model.fit(holdout_data=holdout) output = model.predict(df) if check_statistics(model): is_correct = 0 # model = matching.DAME( stop_unmatched_c= True, verbose=0) # model.fit(holdout_data=holdout) # output = model.predict(df) # if check_statistics(model): # is_correct = 0 # model = matching.DAME(stop_unmatched_t= True, verbose=0) # model.fit(holdout_data=holdout) # output = model.predict(df) # if check_statistics(model): # is_correct = 0 model = matching.DAME(early_stop_un_c_frac = 0.5, verbose=0) model.fit(holdout_data=holdout) output = model.predict(df) if check_statistics(model): is_correct = 0 model = matching.DAME(early_stop_un_t_frac = 0.5, verbose=0) model.fit(holdout_data=holdout) output = model.predict(df) if check_statistics(model): is_correct = 0 model = matching.DAME(early_stop_iterations= 2, verbose=0) model.fit(holdout_data=holdout) output = model.predict(df) if check_statistics(model): is_correct = 0 except (KeyError, ValueError): is_correct = 0 self.assertEqual(1, is_correct, msg='DAME-Error when other parameters')
def test_no_matching_F(self): #Test data split df, true_TE = generate_uniform_given_importance(num_control=3000, num_treated=3000) is_correct = 1 try: df = pd.DataFrame([[1,2,0,1.0],[3,4,1,2.0],[5,6,0,5.0],[7,8,1,8.0],[9,10,1,10.0]]) df.columns = ['cov1','cov2','treated','outcome'] holdout = df.copy() model = matching.DAME(repeats=True) model.fit(holdout_data=holdout) output = model.predict(df) except (KeyError, ValueError): is_correct = 0 self.assertEqual(1, is_correct, msg='DAME-Error when no matching')
def test_data_split_F(self): #Test data split df, true_TE = generate_uniform_given_importance(num_control=3000, num_treated=3000) is_correct = 1 try: for holdout in [0.3,0.5,0.7]: model = matching.DAME(repeats=True) model.fit(holdout_data=holdout) output = model.predict(df) if check_statistics(model): is_correct = 0 break except (KeyError, ValueError): is_correct = 0 self.assertEqual(1, is_correct, msg='DAME-Error when holdout = {0}'.format(holdout))
def test_has_unmatched_units_F(self): #Test data split is_correct = 1 try: df = pd.DataFrame([[1,2,0,1.0],[3,4,0,2.0],[5,6,0,5.0],[7,8,0,8.0],[9,10,1,10.0],[9,20,0,10.0]]) df.columns = ['cov1','cov2','treated','outcome'] holdout = df.copy() model = matching.DAME(repeats=True) model.fit(holdout_data=holdout) output = model.predict(df) if check_statistics(model): is_correct = 0 except (KeyError, ValueError): is_correct = 0 self.assertEqual(1, is_correct, msg='DAME-Error when no matching')
def test_want_pebf_F(self): #Test df, true_TE = generate_uniform_given_importance(num_control=3000, num_treated=3000, num_cov=6, min_val=0, max_val=3, covar_importance=[4,3,2,1,0,0]) holdout, true_TE = generate_uniform_given_importance(num_control=100, num_treated=100, num_cov=6, min_val=0, max_val=3, covar_importance=[4,3,2,1,0,0]) is_correct = 1 try: for want_pe in [False, True]: for want_bf in [False, True]: model = matching.DAME(want_pe=want_pe,want_bf=want_bf) model.fit(holdout_data=holdout) output = model.predict(df) if check_statistics(model) or (want_pe and len(model.pe_each_iter)==0) or (want_bf and len(model.bf_each_iter)==0): is_correct = 0 break except (KeyError, ValueError): is_correct = 0 self.assertEqual(1, is_correct, msg='DAME Error when want_pe = {0} want_bf = {1}'.format(str(want_pe),str(want_bf)))