Python CSVOperations示例，F1toExcavatorMapper.Utils.CSVOperations Python示例

示例#1

0

显示文件

文件： MapperTests.py 项目： BricksandMortar/F1ReporttoExcavator

    def test_set_up_deletes_in_not_append_mode(self):
        # Create individual type CSV (called Family.csv)
        target_file_path = Mapper.get_target_file_path(TargetCSVType.FAMILY,
                                                       self.test_file_path)
        csvops.create_file(target_file_path, TargetCSVType.INDIVIDUAL)

        Mapper.set_up(target_file_path, TargetCSVType.FAMILY, Mode.Mode.CREATE)
        # Ensure that the old individual CSV was deleted and replaced with a file with Family CSV headers
        self.assertTrue(
            csvops.check_headers_match(target_file_path, TargetCSVType.FAMILY))

示例#2

0

显示文件

文件： MapperTests.py 项目： BricksandMortar/F1ReporttoExcavator

    def test_set_up_does_not_delete_in_append_mode(self):
        # Create individual type CSV (called Family.csv)
        target_file_path = Mapper.get_target_file_path(TargetCSVType.FAMILY,
                                                       self.test_file_path)
        csvops.create_file(target_file_path, TargetCSVType.INDIVIDUAL)

        # Should not delete the file, if it does it won't be an INDIVIDUAL type file
        Mapper.set_up(target_file_path, TargetCSVType.FAMILY, Mode.Mode.APPEND)
        # Ensure that the old individual CSV was not deleted
        self.assertTrue(
            csvops.check_headers_match(target_file_path,
                                       TargetCSVType.INDIVIDUAL))

示例#3

0

显示文件

 def test_batch_ids_are_all_unique(self):
     df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME)
     self.fb.map(df, None)
     batch_data = self.fb.map(df, None)
     unique_ids_size = batch_data['BatchID'].value_counts(dropna=True).size
     ids_size = batch_data['BatchID'].values.size
     self.assertEqual(ids_size, unique_ids_size)

示例#4

0

显示文件

 def test_ids_are_unique(self):
     df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME)
     attribute_notes = self.anb.map(df, SourceCSVType.ATTRIBUTE_NOTES)
     unique_ids_size = attribute_notes['note_id'].value_counts(
         dropna=True).size
     ids_size = attribute_notes['note_id'].values.size
     self.assertEqual(ids_size, unique_ids_size)

示例#5

0

显示文件

 def test_all_contribution_ids_are_unique(self):
     df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME)
     contributions_data = self.fb.build_contributions(df)
     unique_ids_size = contributions_data['ContributionID'].value_counts(
         dropna=True).size
     ids_size = contributions_data['ContributionID'].values.size
     self.assertEqual(ids_size, unique_ids_size)

示例#6

0

显示文件

 def test_data_is_not_null(self):
     df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME)
     attribute_notes = self.anb.map(df, SourceCSVType.ATTRIBUTE_NOTES)
     self.assertFalse(
         pd.isnull(attribute_notes['individual_id_1']).any()
         and pd.isnull(attribute_notes['attribute_group_name'].any())
         and pd.isnull(attribute_notes['attribute_name'])
         and pd.isnull(attribute_notes['comment']).any())

示例#7

0

显示文件

 def test_batch_shared_data_columns_are_correct_types(self):
     df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME)
     self.fb.map(df, None)
     batch_date_is_date = self.fb.batch_data[
         'Batch_Date'].dtype == Timestamp
     batch_id_is_int = self.fb.batch_data[
         'Id'].dtype == np.int64 or self.fb.batch_data['Id'].dtype == int
     types_correct = batch_date_is_date and batch_id_is_int
     self.assertTrue(types_correct)

示例#8

0

显示文件

 def test_number_of_batches_equals_number_of_group_by_batch_name_date(self):
     df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME)
     self.fb.map(df, None)
     batch_data = self.fb.map(df, None)
     ids_size = batch_data['BatchID'].values.size
     # This seems weird but does work bizarrely
     group_by_size = batch_data.groupby(['BatchName',
                                         'BatchDate']).size().size
     self.assertEqual(ids_size, group_by_size)

示例#9

0

显示文件

文件： Mapper.py 项目： BricksandMortar/F1ReporttoExcavator

def run(source_file_path, target_file_type: TargetCSVType,
        source_type: SourceCSVType):
    mode = source_type.mode
    set_up(source_file_path, target_file_type, mode)
    data = CSVOperations.read_file_without_check(source_file_path)
    builder = BuilderFactory.get_builder(target_file_type, source_type)
    if builder is None:
        raise Exception('No matching builder was found')
    output_data = builder.map(data, source_type)

    target_file_path = get_target_file_path(target_file_type, source_file_path)
    if output_data is None:
        return
    if mode == Mode.APPEND:
        CSVOperations.delete_write(target_file_path, output_data)
    else:
        CSVOperations.write_file(
            get_target_file_path(target_file_type, source_file_path),
            output_data)

示例#10

0

显示文件

 def test_contributions_ids_are_ints(self):
     df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME)
     contributions_data = self.fb.build_contributions(df)
     contribution_id_is_int = contributions_data[
         'ContributionID'].dtype == int
     contribution_batch_id_is_int = contributions_data[
         'ContributionBatchID'].dtype == int
     contribution_individual_id_is_int = contributions_data[
         'IndividualID'].dtype == int
     self.assertTrue(contribution_id_is_int and contribution_batch_id_is_int
                     and contribution_individual_id_is_int)

示例#11

0

显示文件

 def test_number_of_shared_batch_data_equals_number_of_contributions_group_by_batch_name_date(
         self):
     df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME)
     cloned_df = df.copy()
     cloned_df = self.fb.fill_missing_batch_data(cloned_df)
     self.fb.map(df, None)
     ids_size = self.fb.batch_data['Id'].values.size
     # This seems weird but does work bizarrely
     group_by_size = cloned_df.groupby(['Batch_Name',
                                        'Batch_Date']).size().size
     self.assertEqual(ids_size, group_by_size)

示例#12

0

显示文件

文件： Mapper.py 项目： BricksandMortar/F1ReporttoExcavator

def set_up(source_file_path, target_file_type, mode):
    target_file_path = get_target_file_path(target_file_type, source_file_path)

    if not CSVOperations.check_file_exists(source_file_path):
        print(source_file_path)
        raise MappingFileNotFound(
            'Mapping File Not Found at' + source_file_path, source_file_path)

    if CSVOperations.check_file_exists(
            target_file_path) and mode != Mode.APPEND:
        CSVOperations.delete_file(target_file_path)

    if not CSVOperations.check_file_exists(target_file_path):
        CSVOperations.create_file(target_file_path, target_file_type)

示例#13

0

显示文件

 def test_contributions_amount_equals_contribution_amount(self):
     df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME)
     contributions_data = self.fb.build_contributions(df)
     self.assertEqual(contributions_data['Amount'].sum(), 6084.99)

示例#14

0

显示文件

 def test_all_contributions_received_dates_are_dates(self):
     df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME)
     contributions_data = self.fb.build_contributions(df)
     self.assertTrue(contributions_data['ReceivedDate'].dtype == Timestamp)

示例#15

0

显示文件

 def test_all_check_contributions_have_check_number(self):
     df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME)
     contributions_data = self.fb.build_contributions(df)
     check_contributions = contributions_data.loc[
         contributions_data['ContributionTypeName'] == 'Check']
     self.assertFalse(pd.isnull(check_contributions['CheckNumber']).any())

示例#16

0

显示文件

 def test_batch_columns_match(self):
     df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME)
     self.fb.map(df, None)
     batch_data = self.fb.map(df, None)
     npt.assert_array_equal(batch_data.columns.values,
                            TargetCSVType.BATCH.columns)

示例#17

0

显示文件

 def test_all_contributions_have_funds(self):
     df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME)
     contributions_data = self.fb.build_contributions(df)
     self.assertFalse(pd.isnull(contributions_data['FundName']).any())

示例#18

0

显示文件

 def test_batch_amounts_are_all_decimal(self):
     df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME)
     self.fb.map(df, None)
     batch_data = self.fb.map(df, None)
     self.assertTrue(batch_data['BatchAmount'].dtype == decimal.Decimal)

示例#19

0

显示文件

文件： CSVTests.py 项目： BricksandMortar/F1ReporttoExcavator

 def test_read_attribute_data_has_correct_columns(self):
     df = csvops.read_file_without_check(
         THIS_DIR + '/testdata/A2501E_ConnectionStepsAttributes.csv')
     npt.assert_array_equal(df.columns.values,
                            SourceCSVType.ATTRIBUTES.columns)

示例#20

0

显示文件

 def test_batches_all_have_amounts(self):
     df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME)
     self.fb.map(df, None)
     batch_data = self.fb.map(df, None)
     self.assertFalse(pd.isnull(batch_data['BatchAmount']).any())

示例#21

0

显示文件

 def test_batch_dates_are_all_timestamps(self):
     df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME)
     self.fb.map(df, None)
     batch_data = self.fb.map(df, None)
     self.assertTrue(batch_data['BatchDate'].dtype == Timestamp)

示例#22

0

显示文件

文件： CSVTests.py 项目： BricksandMortar/F1ReporttoExcavator

 def test_date_parse_mmddyy(self):
     correct_date = datetime.date(1957, 2, 23)
     self.assertEqual(correct_date, csvops.parse_date("02/23/57"))

示例#23

0

显示文件

 def test_all_contribution_batch_ids_have_batches(self):
     df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME)
     contributions_data = self.fb.build_contributions(df)
     npt.assert_array_equal(
         contributions_data['ContributionBatchID'].unique(),
         self.fb.batch_data['Id'])

示例#24

0

显示文件

 def test_contributions_columns_match(self):
     df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME)
     contributions_data = self.fb.build_contributions(df)
     npt.assert_array_equal(contributions_data.columns.values,
                            TargetCSVType.CONTRIBUTION.columns)

示例#25

0

显示文件

文件： CSVTests.py 项目： BricksandMortar/F1ReporttoExcavator

 def test_read_individual_household_has_correct_columns(self):
     df = csvops.read_file_without_check(
         THIS_DIR + '/testdata/X9400_no_attributes.csv')
     npt.assert_array_equal(df.columns.values,
                            SourceCSVType.INDIVIDUAL_HOUSEHOLD.columns)

示例#26

0

显示文件

 def test_batch_shared_data_columns_are_all_populated(self):
     df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME)
     self.fb.map(df, None)
     self.assertFalse(self.fb.batch_data.isnull().values.any())

示例#27

0

显示文件

 def test_batch_shared_data_contains_correct_number_of_batches(self):
     df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME)
     self.fb.map(df, None)
     unique_size = self.fb.batch_data['Id'].value_counts(dropna=True).size
     self.assertEqual(unique_size, 5)

示例#28

0

显示文件

 def test_number_of_contributions(self):
     df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME)
     contributions_data = self.fb.build_contributions(df)
     self.assertEquals(len(contributions_data.index), 36)

示例#29

0

显示文件

 def test_batches_ids_are_all_ints(self):
     df = csvops.read_file_without_check(THIS_DIR + TEST_DATA_FILENAME)
     self.fb.map(df, None)
     batch_data = self.fb.map(df, None)
     self.assertTrue(batch_data['BatchID'].dtype == int)

示例#30

0

显示文件

文件： CSVTests.py 项目： BricksandMortar/F1ReporttoExcavator

 def test_date_parse_mmddyyyy(self):
     correct_date = datetime.date(2001, 12, 31)
     self.assertEqual(correct_date, csvops.parse_date("12/31/2001"))