def test_set_up_deletes_in_not_append_mode(self): # Create individual type CSV (called Family.csv) target_file_path = Mapper.get_target_file_path(TargetCSVType.FAMILY, self.test_file_path) csvops.create_file(target_file_path, TargetCSVType.INDIVIDUAL) Mapper.set_up(target_file_path, TargetCSVType.FAMILY, Mode.Mode.CREATE) # Ensure that the old individual CSV was deleted and replaced with a file with Family CSV headers self.assertTrue( csvops.check_headers_match(target_file_path, TargetCSVType.FAMILY))
def test_set_up_does_not_delete_in_append_mode(self): # Create individual type CSV (called Family.csv) target_file_path = Mapper.get_target_file_path(TargetCSVType.FAMILY, self.test_file_path) csvops.create_file(target_file_path, TargetCSVType.INDIVIDUAL) # Should not delete the file, if it does it won't be an INDIVIDUAL type file Mapper.set_up(target_file_path, TargetCSVType.FAMILY, Mode.Mode.APPEND) # Ensure that the old individual CSV was not deleted self.assertTrue( csvops.check_headers_match(target_file_path, TargetCSVType.INDIVIDUAL))
def test_batch_ids_are_all_unique(self): df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME) self.fb.map(df, None) batch_data = self.fb.map(df, None) unique_ids_size = batch_data['BatchID'].value_counts(dropna=True).size ids_size = batch_data['BatchID'].values.size self.assertEqual(ids_size, unique_ids_size)
def test_ids_are_unique(self): df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME) attribute_notes = self.anb.map(df, SourceCSVType.ATTRIBUTE_NOTES) unique_ids_size = attribute_notes['note_id'].value_counts( dropna=True).size ids_size = attribute_notes['note_id'].values.size self.assertEqual(ids_size, unique_ids_size)
def test_all_contribution_ids_are_unique(self): df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME) contributions_data = self.fb.build_contributions(df) unique_ids_size = contributions_data['ContributionID'].value_counts( dropna=True).size ids_size = contributions_data['ContributionID'].values.size self.assertEqual(ids_size, unique_ids_size)
def test_data_is_not_null(self): df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME) attribute_notes = self.anb.map(df, SourceCSVType.ATTRIBUTE_NOTES) self.assertFalse( pd.isnull(attribute_notes['individual_id_1']).any() and pd.isnull(attribute_notes['attribute_group_name'].any()) and pd.isnull(attribute_notes['attribute_name']) and pd.isnull(attribute_notes['comment']).any())
def test_batch_shared_data_columns_are_correct_types(self): df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME) self.fb.map(df, None) batch_date_is_date = self.fb.batch_data[ 'Batch_Date'].dtype == Timestamp batch_id_is_int = self.fb.batch_data[ 'Id'].dtype == np.int64 or self.fb.batch_data['Id'].dtype == int types_correct = batch_date_is_date and batch_id_is_int self.assertTrue(types_correct)
def test_number_of_batches_equals_number_of_group_by_batch_name_date(self): df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME) self.fb.map(df, None) batch_data = self.fb.map(df, None) ids_size = batch_data['BatchID'].values.size # This seems weird but does work bizarrely group_by_size = batch_data.groupby(['BatchName', 'BatchDate']).size().size self.assertEqual(ids_size, group_by_size)
def run(source_file_path, target_file_type: TargetCSVType, source_type: SourceCSVType): mode = source_type.mode set_up(source_file_path, target_file_type, mode) data = CSVOperations.read_file_without_check(source_file_path) builder = BuilderFactory.get_builder(target_file_type, source_type) if builder is None: raise Exception('No matching builder was found') output_data = builder.map(data, source_type) target_file_path = get_target_file_path(target_file_type, source_file_path) if output_data is None: return if mode == Mode.APPEND: CSVOperations.delete_write(target_file_path, output_data) else: CSVOperations.write_file( get_target_file_path(target_file_type, source_file_path), output_data)
def test_contributions_ids_are_ints(self): df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME) contributions_data = self.fb.build_contributions(df) contribution_id_is_int = contributions_data[ 'ContributionID'].dtype == int contribution_batch_id_is_int = contributions_data[ 'ContributionBatchID'].dtype == int contribution_individual_id_is_int = contributions_data[ 'IndividualID'].dtype == int self.assertTrue(contribution_id_is_int and contribution_batch_id_is_int and contribution_individual_id_is_int)
def test_number_of_shared_batch_data_equals_number_of_contributions_group_by_batch_name_date( self): df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME) cloned_df = df.copy() cloned_df = self.fb.fill_missing_batch_data(cloned_df) self.fb.map(df, None) ids_size = self.fb.batch_data['Id'].values.size # This seems weird but does work bizarrely group_by_size = cloned_df.groupby(['Batch_Name', 'Batch_Date']).size().size self.assertEqual(ids_size, group_by_size)
def set_up(source_file_path, target_file_type, mode): target_file_path = get_target_file_path(target_file_type, source_file_path) if not CSVOperations.check_file_exists(source_file_path): print(source_file_path) raise MappingFileNotFound( 'Mapping File Not Found at' + source_file_path, source_file_path) if CSVOperations.check_file_exists( target_file_path) and mode != Mode.APPEND: CSVOperations.delete_file(target_file_path) if not CSVOperations.check_file_exists(target_file_path): CSVOperations.create_file(target_file_path, target_file_type)
def test_contributions_amount_equals_contribution_amount(self): df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME) contributions_data = self.fb.build_contributions(df) self.assertEqual(contributions_data['Amount'].sum(), 6084.99)
def test_all_contributions_received_dates_are_dates(self): df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME) contributions_data = self.fb.build_contributions(df) self.assertTrue(contributions_data['ReceivedDate'].dtype == Timestamp)
def test_all_check_contributions_have_check_number(self): df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME) contributions_data = self.fb.build_contributions(df) check_contributions = contributions_data.loc[ contributions_data['ContributionTypeName'] == 'Check'] self.assertFalse(pd.isnull(check_contributions['CheckNumber']).any())
def test_batch_columns_match(self): df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME) self.fb.map(df, None) batch_data = self.fb.map(df, None) npt.assert_array_equal(batch_data.columns.values, TargetCSVType.BATCH.columns)
def test_all_contributions_have_funds(self): df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME) contributions_data = self.fb.build_contributions(df) self.assertFalse(pd.isnull(contributions_data['FundName']).any())
def test_batch_amounts_are_all_decimal(self): df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME) self.fb.map(df, None) batch_data = self.fb.map(df, None) self.assertTrue(batch_data['BatchAmount'].dtype == decimal.Decimal)
def test_read_attribute_data_has_correct_columns(self): df = csvops.read_file_without_check( THIS_DIR + '/testdata/A2501E_ConnectionStepsAttributes.csv') npt.assert_array_equal(df.columns.values, SourceCSVType.ATTRIBUTES.columns)
def test_batches_all_have_amounts(self): df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME) self.fb.map(df, None) batch_data = self.fb.map(df, None) self.assertFalse(pd.isnull(batch_data['BatchAmount']).any())
def test_batch_dates_are_all_timestamps(self): df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME) self.fb.map(df, None) batch_data = self.fb.map(df, None) self.assertTrue(batch_data['BatchDate'].dtype == Timestamp)
def test_date_parse_mmddyy(self): correct_date = datetime.date(1957, 2, 23) self.assertEqual(correct_date, csvops.parse_date("02/23/57"))
def test_all_contribution_batch_ids_have_batches(self): df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME) contributions_data = self.fb.build_contributions(df) npt.assert_array_equal( contributions_data['ContributionBatchID'].unique(), self.fb.batch_data['Id'])
def test_contributions_columns_match(self): df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME) contributions_data = self.fb.build_contributions(df) npt.assert_array_equal(contributions_data.columns.values, TargetCSVType.CONTRIBUTION.columns)
def test_read_individual_household_has_correct_columns(self): df = csvops.read_file_without_check( THIS_DIR + '/testdata/X9400_no_attributes.csv') npt.assert_array_equal(df.columns.values, SourceCSVType.INDIVIDUAL_HOUSEHOLD.columns)
def test_batch_shared_data_columns_are_all_populated(self): df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME) self.fb.map(df, None) self.assertFalse(self.fb.batch_data.isnull().values.any())
def test_batch_shared_data_contains_correct_number_of_batches(self): df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME) self.fb.map(df, None) unique_size = self.fb.batch_data['Id'].value_counts(dropna=True).size self.assertEqual(unique_size, 5)
def test_number_of_contributions(self): df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME) contributions_data = self.fb.build_contributions(df) self.assertEquals(len(contributions_data.index), 36)
def test_batches_ids_are_all_ints(self): df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME) self.fb.map(df, None) batch_data = self.fb.map(df, None) self.assertTrue(batch_data['BatchID'].dtype == int)
def test_date_parse_mmddyyyy(self): correct_date = datetime.date(2001, 12, 31) self.assertEqual(correct_date, csvops.parse_date("12/31/2001"))