def testDuplicateGeoInSamePair(self):
   temp_df = self.df.copy()
   # change geo #1 to geo #2, so that geo #2 is duplicated in pair 1 with
   # assignment 0 and 1
   temp_df.loc[temp_df['geo'] == 1, 'geo'] = 2
   with self.assertRaisesRegex(
       ValueError, r'Some geos are duplicated and appear in multiple pairs.'):
     trimmed_match_post_analysis.check_input_data(temp_df)
 def testCheckGeosPerPairInInputData(self):
   temp_df = self.df.copy()
   # reassign geo #2 which is in treatment, and geo #3 which is control
   temp_df.loc[temp_df['geo'] == 2, 'assignment'] = 0
   temp_df.loc[temp_df['geo'] == 3, 'assignment'] = 1
   with self.assertRaisesRegex(
       ValueError, r'Some pairs do not have one geo for each group.'):
     trimmed_match_post_analysis.check_input_data(temp_df)
 def testDuplicateGeoInInputData(self):
   temp_df = self.df.copy()
   # change geo #4 to geo #2, so that geo #2 is duplicated
   temp_df.loc[temp_df['geo'] == 4, 'geo'] = 2
   # change geo #5 to geo #3, so that geo #3 is duplicated
   temp_df.loc[temp_df['geo'] == 5, 'geo'] = 3
   with self.assertRaisesRegex(
       ValueError, r'Some geos are duplicated and appear in multiple pairs.'):
     trimmed_match_post_analysis.check_input_data(temp_df)
 def testCheckInputDataColumns(self):
   temp_df = self.df.copy()
   # remove the column assignment
   temp_df.drop(columns='assignment', inplace=True)
   with self.assertRaisesRegex(
       ValueError,
       'The mandatory columns {\'assignment\'} are missing from the input data'
   ):
     trimmed_match_post_analysis.check_input_data(temp_df)
 def testCheckInputDataCorrectGroupLabels(self):
   with self.assertRaisesRegex(
       ValueError,
       r'The data do not have observations for the two groups.' +
       r'Check the data and the values used to indicate the ' +
       r'assignments for treatment and control. The labels ' +
       r'found in the data in input are ' +
       r'\[0, 1\], and the expected labels ' +
       r'are: Treatment=1, Control=2'):
     trimmed_match_post_analysis.check_input_data(
         self.df, group_control=2, group_treatment=1)
 def testManyGeosPerPairWithSameAssignmentInInputData(self):
   temp_df = self.df.copy()
   # add one additional geo to pairs #1 and #4
   temp_df = temp_df.append(pd.DataFrame({
       'date': ['2020-10-10', '2020-10-10'],
       'geo': [10, 9],
       'response': [10, 11],
       'cost': [1.0, 2.0],
       'pair': [1, 4],
       'assignment': [0, 1],
       'period': [1, 1],
   }))
   with self.assertRaisesRegex(
       ValueError, r'Some pairs do not have one geo for each group.'):
     trimmed_match_post_analysis.check_input_data(temp_df)
  def testCheckUnequalGroupSizesInInputData(self):
    temp_df = self.df.copy()
    # remove geo #2 which is in treatment
    temp_df = temp_df[temp_df['geo'] != 2]
    with self.assertRaisesRegex(ValueError,
                                r'Some pairs do not have one geo for ' +
                                r'each group.'):
      trimmed_match_post_analysis.check_input_data(temp_df)

    temp_df = self.df.copy()
    # remove geo #1 which is in control
    temp_df = temp_df[temp_df['geo'] != 1]
    with self.assertRaisesRegex(ValueError,
                                r'Some pairs do not have one geo for ' +
                                r'each group.'):
      trimmed_match_post_analysis.check_input_data(temp_df)
 def testCheckInputData(self):
   temp_df = self.df.copy()
   # remove one observation for geo #2
   temp_df = temp_df[~((temp_df['geo'] == 2) &
                       (temp_df['date'] == '2020-10-10'))]
   geox_data = trimmed_match_post_analysis.check_input_data(temp_df)
   expected_df = pd.DataFrame({
       'date': ['2020-10-09', '2020-10-10', '2020-10-11'] * 4,
       'geo': [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4],
       'pair': [1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2],
       'assignment': [0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1],
       'response': [10, 10, 10, 20, 0.0, 20, 30, 30, 30, 40, 40, 40],
       'cost': [1.0, 1.0, 1.0, 2.0, 0.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0],
   }).sort_values(by=['date', 'geo']).reset_index(drop=True)
   self.assertTrue(geox_data.equals(expected_df))
 def testCheckInputDataOrder(self):
   temp = pd.DataFrame({
       'date': pd.date_range('2021-01-01', periods=4),
       'pair': [2, 2, 1, 1],
       'assignment': [1, 2, 2, 1],
       'geo': [1, 2, 3, 4],
       'response': 0,
       'cost': 0
   })
   geox_data = trimmed_match_post_analysis.check_input_data(
       temp, group_control=1, group_treatment=2)
   self.assertTrue(
       np.array_equal(
           geox_data[['date', 'geo', 'pair', 'assignment']],
           pd.DataFrame({
               'date': np.repeat(temp['date'], 4),
               'geo': [1, 2, 3, 4] * 4,
               'pair': [2, 2, 1, 1] * 4,
               'assignment': [1, 2, 2, 1] * 4
           })))
 def testGeosNotInExperimentAreExcluded(self):
   temp_df = self.df.copy()
   # add two additional geos with assignment -1 in new and different pairs.
   temp_df = temp_df.append(pd.DataFrame({
       'date': ['2020-10-10', '2020-10-10'],
       'geo': [9, 10],
       'response': [10, 11],
       'cost': [1.0, 2.0],
       'pair': [100, 101],
       'assignment': [-1, -1],
   }))
   geox_data = trimmed_match_post_analysis.check_input_data(temp_df)
   expected_df = pd.DataFrame({
       'date': ['2020-10-09', '2020-10-10', '2020-10-11'] * 4,
       'geo': [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4],
       'pair': [1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2],
       'assignment': [0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1],
       'response': [10, 10, 10, 20, 20.0, 20, 30, 30, 30, 40, 40, 40],
       'cost': [1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0],
   }).sort_values(by=['date', 'geo']).reset_index(drop=True)
   self.assertTrue(geox_data.reset_index(drop=True).equals(expected_df))