Пример #1
0
    def test_PE_F(self):
        for adaptive_weights in [ 'ridge', 'decisiontree', 'ridgeCV','decisiontreeCV']: #False,
            is_correct = 1
            try:
                model = None
                if adaptive_weights == False:
                    df, true_TE = generate_uniform_given_importance(num_control=100, num_treated=100,
                                              num_cov=7, min_val=0,
                                              max_val=3, covar_importance=[4,3,2,1,0,0,0])
                    holdout, true_TE = generate_uniform_given_importance(num_control=100, num_treated=100,
                                                          num_cov=7, min_val=0,
                                                              max_val=3, covar_importance=[4,3,2,1,0,0,0])
                    covar_importance = np.array([4,3,2,1,0,0,0])
                    weight_array = covar_importance/covar_importance.sum()
                    model = matching.DAME(repeats=False, verbose=0,adaptive_weights =adaptive_weights)
                    model.fit(holdout_data=holdout,weight_array = list(weight_array))
                    output = model.predict(df)
                else:
                    df, true_TE = generate_uniform_given_importance()
                    holdout, true_TE = generate_uniform_given_importance()
                    model = matching.DAME(repeats=False, verbose=0,adaptive_weights =adaptive_weights)
                    model.fit(holdout_data=holdout)
                    output = model.predict(df)

                if check_statistics(model):
                    is_correct = 0
                    break

            except (KeyError, ValueError):
                is_correct = 0


            self.assertEqual(1, is_correct,
                             msg='DAME-Error when we use PE method: {0} '.format(adaptive_weights))
Пример #2
0
    def test_verbose_F(self):
        #Test verbose
        df, true_TE = generate_uniform_given_importance()
        for verbose in [0,1,2,3]:
            is_correct = 1
            try:
                df, true_TE = generate_uniform_given_importance(num_control=1000, num_treated=1000,
                                                              num_cov=7, min_val=0,
                                                              max_val=3, covar_importance=[4,3,2,1,0,0,0])
                holdout, true_TE = generate_uniform_given_importance(num_control=100, num_treated=100,
                                                      num_cov=7, min_val=0,
                                                          max_val=3, covar_importance=[4,3,2,1,0,0,0])
                covar_importance = np.array([4,3,2,1,0,0,0])
                weight_array = covar_importance/covar_importance.sum()
                model = matching.DAME(missing_data_replace = 2, want_bf = True, verbose = verbose)
                model.fit(holdout_data=holdout)
                output = model.predict(df)

                model = matching.DAME(verbose=verbose) # repeats = True
                model.fit(holdout_data=0.5)
                output = model.predict(df)
                if check_statistics(model):
                    is_correct = 0
                    break
            except (KeyError, ValueError):
                is_correct = 0

            self.assertEqual(1, is_correct, msg='DAME-Error when verbose ={0}'.format(verbose))
Пример #3
0
    def test_miss_data_F(self):

        is_correct = 1
        try:
            for missing_holdout_replace in [0,1,2]:
                for missing_data_replace in [0,1,2]:
                    #Test missig data handling
                    df, true_TE = generate_uniform_given_importance(num_control=1000, num_treated=1000)
                    #Create missing df
                    m,n = df.shape
                    for i in range(int(m/10)):
                        for j in [0,int(n/2)]:
                            df.iloc[i,j] = np.nan
                    holdout = df.copy()
                    model = matching.DAME(repeats = False,missing_holdout_replace = missing_holdout_replace,missing_data_replace=missing_data_replace )
                    model.fit(holdout_data=holdout)
                    output = model.predict(df)
                    if check_statistics(model):
                        is_correct = 0
                        break

        except (KeyError, ValueError):
            is_correct = 0
        self.assertEqual(1, is_correct, msg='DAME-Error when do missing data'\
                             'handling with missing_holdout_replace = {0},missing_data_replace{1}'.format(missing_holdout_replace,missing_data_replace))
Пример #4
0
    def test_datasets_F(self):
        df_path = os.path.join((os.path.dirname(__file__)), 'basicTestData.csv')
        for gen in [generate_uniform_given_importance,generate_binomial_given_importance,generate_binomial_decay_importance,df_path]:
            is_correct = 1
            try:
                df = None
                holdout = None
                if type(gen) != str:
                    df, true_TE = gen()
                    holdout, true_TE = gen()
                else:
                    df  = gen
                    holdout = gen
                model = matching.DAME(repeats=False)
                model.fit(holdout_data=holdout)
                output = model.predict(df)
                
                        
                if check_statistics(model):
                    is_correct = 0
                    break

            except (KeyError, ValueError):
                is_correct = 0

            self.assertEqual(1, is_correct,
                             msg='DAME-Error when we use the dataset generated by {0} '.format(str(gen)))
    def test_miss_data_indicator_F(self):
        is_correct = 1
        try:
            df, true_TE = generate_uniform_given_importance(num_control=1000,
                                                            num_treated=1000)
            #Create missing df
            m, n = df.shape
            for i in range(int(m / 100)):
                for j in [0, int(n / 2)]:
                    df.iloc[i, j] = 'a'
            holdout = df.copy()

            model = matching.DAME(missing_indicator='a',
                                  missing_holdout_replace=1,
                                  missing_data_replace=1)
            model.fit(holdout_data=holdout)
            output = model.predict(df)
            if check_statistics(model):
                is_correct = 0

        except (KeyError, ValueError):
            is_correct = 0
        self.assertEqual(1,
                         is_correct,
                         msg='DAME-Error when missing_indicator')
    def test_repeats_F(self):
        #Test other parameters
        df, true_TE = generate_uniform_given_importance(
            num_control=100,
            num_treated=100,
            num_cov=7,
            min_val=0,
            max_val=3,
            covar_importance=[4, 3, 2, 1, 0, 0, 0])
        holdout, true_TE = generate_uniform_given_importance(
            num_control=100,
            num_treated=100,
            num_cov=7,
            min_val=0,
            max_val=3,
            covar_importance=[4, 3, 2, 1, 0, 0, 0])
        is_correct = 1
        try:
            model = matching.DAME(repeats=True)
            model.fit(holdout_data=holdout)
            output = model.predict(df)
            if check_statistics(model):
                is_correct = 0

        except (KeyError, ValueError):
            is_correct = 0
        self.assertEqual(1, is_correct, msg='DAME-Error when repeat = True')
Пример #7
0
    def test_other_param_F(self):
        is_correct = 1
        try:
            df, true_TE = generate_uniform_given_importance(num_control=1000, num_treated=1000,
                                                  num_cov=7, min_val=0,
                                                  max_val=3, covar_importance=[4,3,2,1,0,0,0])
            holdout, true_TE = generate_uniform_given_importance(num_control=100, num_treated=100,
                                                  num_cov=7, min_val=0,
                                                      max_val=3, covar_importance=[4,3,2,1,0,0,0])

            model = matching.DAME( early_stop_pe= 1, verbose=0)
            model.fit(holdout_data=holdout)
            output = model.predict(df)
            if check_statistics(model):
                is_correct = 0
#            model = matching.DAME( stop_unmatched_c= True, verbose=0)
#            model.fit(holdout_data=holdout)
#            output = model.predict(df)
#            if check_statistics(model):
#                is_correct = 0
#            model = matching.DAME(stop_unmatched_t= True, verbose=0)
#            model.fit(holdout_data=holdout)
#            output = model.predict(df)
#            if check_statistics(model):
#                is_correct = 0
            model = matching.DAME(early_stop_un_c_frac = 0.5, verbose=0)
            model.fit(holdout_data=holdout)
            output = model.predict(df)
            if check_statistics(model):
                is_correct = 0
            model = matching.DAME(early_stop_un_t_frac = 0.5, verbose=0)
            model.fit(holdout_data=holdout)
            output = model.predict(df)
            if check_statistics(model):
                is_correct = 0
            model = matching.DAME(early_stop_iterations= 2, verbose=0)
            model.fit(holdout_data=holdout)
            output = model.predict(df)
            if check_statistics(model):
                is_correct = 0

        except (KeyError, ValueError):
            is_correct = 0
        self.assertEqual(1, is_correct, msg='DAME-Error when other parameters')
Пример #8
0
    def test_no_matching_F(self):
        #Test data split
        df, true_TE = generate_uniform_given_importance(num_control=3000, num_treated=3000)

        is_correct = 1
        try:
            df = pd.DataFrame([[1,2,0,1.0],[3,4,1,2.0],[5,6,0,5.0],[7,8,1,8.0],[9,10,1,10.0]])
            df.columns = ['cov1','cov2','treated','outcome']
            holdout = df.copy()
            model = matching.DAME(repeats=True)
            model.fit(holdout_data=holdout)
            output = model.predict(df)
        except (KeyError, ValueError):
            is_correct = 0

        self.assertEqual(1, is_correct, msg='DAME-Error when no matching')
Пример #9
0
    def test_data_split_F(self):
        #Test data split
        df, true_TE = generate_uniform_given_importance(num_control=3000, num_treated=3000)

        is_correct = 1
        try:
            for holdout in [0.3,0.5,0.7]:
                model = matching.DAME(repeats=True)
                model.fit(holdout_data=holdout)
                output = model.predict(df)
                if check_statistics(model):
                    is_correct = 0
                    break
        except (KeyError, ValueError):
            is_correct = 0

        self.assertEqual(1, is_correct, msg='DAME-Error when holdout = {0}'.format(holdout))
Пример #10
0
    def test_has_unmatched_units_F(self):
        #Test data split
        is_correct = 1
        try:
            df = pd.DataFrame([[1,2,0,1.0],[3,4,0,2.0],[5,6,0,5.0],[7,8,0,8.0],[9,10,1,10.0],[9,20,0,10.0]])
            df.columns = ['cov1','cov2','treated','outcome']
            holdout = df.copy()
            model = matching.DAME(repeats=True)
            model.fit(holdout_data=holdout)
            output = model.predict(df)

            if check_statistics(model):
                is_correct = 0
        except (KeyError, ValueError):
            is_correct = 0

        self.assertEqual(1, is_correct, msg='DAME-Error when no matching')
Пример #11
0
    def test_want_pebf_F(self):
        #Test
        df, true_TE = generate_uniform_given_importance(num_control=3000, num_treated=3000,
                                              num_cov=6, min_val=0,
                                              max_val=3, covar_importance=[4,3,2,1,0,0])
        holdout, true_TE = generate_uniform_given_importance(num_control=100, num_treated=100,
                                              num_cov=6, min_val=0,
                                              max_val=3, covar_importance=[4,3,2,1,0,0])

        is_correct = 1
        try:
            for want_pe in [False, True]:
                for want_bf in [False, True]:
                    model = matching.DAME(want_pe=want_pe,want_bf=want_bf)
                    model.fit(holdout_data=holdout)
                    output = model.predict(df)
                    if check_statistics(model) or (want_pe and len(model.pe_each_iter)==0) or (want_bf and len(model.bf_each_iter)==0):
                        is_correct = 0
                        break

        except (KeyError, ValueError):
            is_correct = 0
        self.assertEqual(1, is_correct, msg='DAME Error when want_pe = {0} want_bf = {1}'.format(str(want_pe),str(want_bf)))