def test_set_map_column_number(self): comparable = Compare(order=1) comparable.header = [{ "column_name": "id", "column_location": 1, "column_type": "" }, { "column_name": "first_name", "column_location": 2, "column_type": "mapped" }, { "column_name": "last_name", "column_location": 3, "column_type": "duplicate" }, { "column_name": "last_name", "column_location": 4, "column_type": "duplicate" }, { "column_name": "requirement", "column_location": 5, "column_type": "mapped" }, { "column_name": "alternate_name", "column_location": 6, "column_type": "" }] comparable.index_column_name = [{ "column_name": "id", "column_location": 1, "column_type": "" }] header_validator.set_map_column_number(comparable) assert 2 == comparable.number_of_mapped_columns
def test_extract_not_checked_column(self): comparable = Compare() comparable.header = [ {"column_name": "id", "column_location": 1, "column_type": ""}, {"column_name": "first_name", "column_location": 2, "column_type": ""}, {"column_name": "last_name", "column_location": 3, "column_type": "not_checked"}, {"column_name": "middle_name", "column_location": 3, "column_type": "not_checked"}, {"column_name": "requirement", "column_location": 5, "column_type": "mapped"}, {"column_name": "alternate_name", "column_location": 6, "column_type": ""}] comparable.index_column_name = [ {"column_name": "id", "column_location": 1}] data = { 'id': [1, 2, 3], 'first_name': ['f1', 'f2', 'f3'], 'last_name': ['l1', 'l2', 'l3'], 'middle_name': ['m1', 'm2', 'm3'], 'requirement': ['r1', 'r2', 'r3'], 'alternate_name': ['a1', 'a2', 'a3'] } expected = pd.DataFrame({'last_name': ['l1', 'l2', 'l3'], 'middle_name': ['m1', 'm2', 'm3']}) comparable.data_frame = pd.DataFrame(data) data_importer.extract_not_checked_column(comparable) assert expected.equals(comparable.not_checked_column)
def test_format_index_column(self): comparable = Compare(order=0) comparable.header = [{ "column_name": "id", "column_location": 1, "column_type": "" }, { "column_name": "first_name", "column_location": 2, "column_type": "mapped" }, { "column_name": "last_name", "column_location": 3, "column_type": "duplicate" }, { "column_name": "last_name", "column_location": 4, "column_type": "duplicate" }, { "column_name": "requirement", "column_location": 5, "column_type": "mapped" }, { "column_name": "alternate_name", "column_location": 6, "column_type": "" }] comparable.index_column_name = [{ "column_name": "id", "column_location": 1, "column_type": "" }] header_validator.format_index_column(comparable) assert comparable.header[0]['column_type'] == "index"
def test_set_start_end_index_column(self): comparable = Compare(order=0) comparable.start_column = 1 comparable.number_of_index_column = 1 header_validator.set_start_end_index_column(comparable) assert 1 == comparable.index_column_start assert 1 == comparable.index_column_end
def test_check_for_delimiter(self): comparable = Compare() comparable.file_name = os.path.join(self.MOCK_DATA_DIR, "simple_csv_file.csv") comparable.encoding = "utf-8" comparable.delimiter = "," assert input_validator.check_for_delimiter(comparable) is None
def test_check_for_input_file_existence_when_file_exist(self, mock_isfile): comparator_obj = Compare() comparator_obj.file_name = mock_input_dict.parameter_for_input_file_existence[ "file_1_name"] mock_isfile.return_value = True assert input_validator.check_for_input_file_existence( comparator_obj) is None
def test_set_map_column_location(self): Compare.map_columns = [( { "column_name": "first_name" }, { "column_name": "first_name" }, ), ( { "column_name": "last_name" }, { "column_name": "last_name" }, )] comparable = Compare(order=0) comparable.header = [{ "column_name": "first_name", "column_location": 2 }, { "column_name": "last_name", "column_location": 3 }] header_validator.set_map_column_location(comparable) assert comparable.header[0] == Compare.map_columns[0][0] assert comparable.header[1] == Compare.map_columns[1][0]
def test_stringify_index(self, mock_df, mock_index): comparable = Compare() comparable.data_frame = mock_df comparable.index_column_name = mock_index index_validator.stringify_index(comparable) for val in comparable.data_frame['id']: assert isinstance(val, str)
def test_set_map_column_location_when_not_exist(self): Compare.map_columns = [( { "column_name": "first_name" }, { "column_name": "first_name" }, ), ( { "column_name": "last_name" }, { "column_name": "last_name" }, )] comparable = Compare(order=0) comparable.header = [{ "column_name": "first_name1", "column_location": 2 }, { "column_name": "last_name1", "column_location": 3 }] with pytest.raises(AppErrorHandler): header_validator.set_map_column_location(comparable)
def test_check_for_non_comma_delimiter(self): comparable = Compare() comparable.file_name = os.path.join(self.MOCK_DATA_DIR, "none_csv_file.txt") comparable.encoding = "utf-8" comparable.delimiter = "," with pytest.raises(AppErrorHandler): input_validator.check_for_delimiter(comparable)
def test_freeze_pandas_index(self): comparable = Compare() df = pd.DataFrame({'a': ['x', 'y', 'z']}) expected = pd.DataFrame( {'a': ['x', 'y', 'z'], Field.pandas_original_index.value: [0, 1, 2]}) comparable.original_data_frame = df data_importer.freeze_pandas_index(comparable) assert df.equals(expected)
def test_strip_index(self, mock_df, mock_index): comparable = Compare() comparable.data_frame = mock_df comparable.index_column_name = mock_index index_validator.strip_index(comparable) expected = [1, 2, 'str', 'abc', 'abc'] assert expected == list(comparable.data_frame["id"])
def test_sort_index(self, mock_index): comparable = Compare() comparable.index_column_name = mock_index d = {'id': ['1', '2', 'str', 'abc', 'abc']} comparable.data_frame = pd.DataFrame(data=d, dtype="object") index_validator.sort_index(comparable) expected = ['1', '2', 'abc', 'abc', 'str'] assert expected == list(comparable.data_frame['id'])
def test_check_for_duplicate_index(self, mock_index): comparable = Compare() comparable.index_column_name = mock_index d = {'id': ['dup1', '1', '2', 'dup1', '3', '4']} comparable.data_frame = pd.DataFrame(data=d, dtype="object") index_validator.check_for_duplicate_index(comparable) actual = list(comparable.duplicate_index['id'].values) expected = ['dup1', 'dup1'] assert expected == actual
def test_check_for_empty_index(self, mock_index): comparable = Compare() comparable.index_column_name = mock_index d = {'id': ['', '', 'str', 'abc', 'abc', '', None, np.nan]} comparable.data_frame = pd.DataFrame(data=d, dtype="object") index_validator.check_for_empty_index(comparable) expected = [0, 1, 5, 6, 7] actual = comparable.empty_index assert expected == actual
def test_check_for_output_file_write_access_when_file_is_not_writeable( self, mock_os_access): comparator_obj = Compare() comparator_obj.file_name = mock_input_dict.parameter_for_input_file_existence[ "file_1_name"] mock_os_access.return_value = False with pytest.raises(AppErrorHandler): input_validator.check_for_file_write_access(comparator_obj) mock_os_access.assert_called_once_with( f'os.access({comparator_obj}, os.W_OK)')
def test_get_file_encoding_for_failing_encoding_cases(self): failing_encoding_names = self.create_mock_data_file_for_encoding_types( "failing_encoding_names.csv") comparator_obj = Compare() for encoding_val in failing_encoding_names: with pytest.raises(AppErrorHandler): mock_file_path = os.path.join(self.MOCK_DATA_DIR, f'{encoding_val}.csv') comparator_obj.file_name = mock_file_path input_validator.set_file_encoding(comparator_obj)
def test_drop_empty_index(self, mock_index): comparable = Compare() comparable.index_column_name = mock_index d = {'id': ['', '', 'str', 'abc', 'abc', '', None]} comparable.data_frame = pd.DataFrame(data=d, dtype="object") comparable.empty_index = [0, 1, 5, 6] index_validator.drop_empty_index(comparable) expected = ['str', 'abc', 'abc'] actual = list(comparable.data_frame["id"]) assert expected == actual
def test_check_for_input_file_existence_when_file_does_not_exist( self, mock_isfile): comparator_obj = Compare() comparator_obj.file_name = mock_input_dict.parameter_for_input_file_existence[ "file_1_name"] mock_isfile.return_value = False with pytest.raises(AppErrorHandler): input_validator.check_for_input_file_existence(comparator_obj) mock_isfile.assert_called_once_with( f'os.path.isfile({comparator_obj})')
def set_start_end_checked_column(comparable: Compare): if comparable.number_of_regular_columns + comparable.number_of_mapped_columns > 0: comparable.checked_column_start = \ comparable.start_column \ + comparable.number_of_index_column + 1 - 1 comparable.checked_column_end = \ comparable.checked_column_start \ + comparable.number_of_regular_columns \ + comparable.number_of_mapped_columns \ - 1
def test_verify_file_encoding_when_can_not_detect_encoding(self): not_able_to_detect_encoding_names = self.create_mock_data_file_for_encoding_types( "failing_encoding_names.csv") comparator_obj = Compare() for encoding_val in not_able_to_detect_encoding_names: with pytest.raises(AppErrorHandler): mock_file_path = os.path.join(self.MOCK_DATA_DIR, f'{encoding_val}.csv') comparator_obj.file_name = mock_file_path comparator_obj.encoding = 'ascii' input_validator.check_for_file_encoding(comparator_obj)
def test_verify_file_encoding_when_can_detect_encoding(self): not_able_to_detect_encoding_names = self.create_mock_data_file_for_encoding_types( "failing_encoding_names.csv") comparator_obj = Compare() for encoding_val in not_able_to_detect_encoding_names: mock_file_path = os.path.join(self.MOCK_DATA_DIR, f'{encoding_val}.csv') comparator_obj.file_name = mock_file_path comparator_obj.encoding = encoding_val assert input_validator.check_for_file_encoding( comparator_obj) is None
def test_drop_duplicate_index(self, mock_index): comparable = Compare() comparable.index_column_name = mock_index d = {'id': ['dup1', 'dup1', 'dup1', 'AAA', 'BBB']} dup = {'id': ['dup1', 'dup1', 'dup1']} comparable.data_frame = pd.DataFrame(data=d, dtype="object") comparable.duplicate_index = pd.DataFrame(data=dup, dtype="object") index_validator.drop_duplicate_index(comparable) expected = ['AAA', 'BBB'] actual = list(comparable.data_frame['id'].values) assert expected == actual
def comparable(self, request): f_path, f_name = os.path.split(os.path.abspath(__file__)) mock_data_dir = os.path.join(f_path, os.pardir, 'mock_data') comparable_obj = Compare() for name in request.param: setattr(comparable_obj, name, request.param[name]) if "file_name" in request.param: comparable_obj.file_name = os.path.join(mock_data_dir, f'{request.param["file_name"]}') return comparable_obj
def set_start_column(comparable_a: Compare, comparable_b: Compare): comparable_a.start_column = 1 # not supporting multi index comparable_b.start_column = \ comparable_a.number_of_index_column \ + comparable_a.number_of_unnamed_columns \ + comparable_a.number_of_duplicate_columns \ + comparable_a.number_of_disjunctive_columns \ + comparable_a.number_of_not_checked_columns \ + comparable_a.number_of_mapped_columns \ + comparable_a.number_of_regular_columns \ + 1 # The first column is for labels and xlxwriter is 0 based
def test_set_index_column_location_when_multiple_index_exist(self): comparable = Compare() comparable.index_column_name = [{"column_name": "id"}] comparable.header = [{ "column_name": "id", "column_location": 1 }, { "column_name": "id", "column_location": 2 }] with pytest.raises(AppErrorHandler): header_validator.set_index_column_location(comparable)
def test_set_start_column(self): comparable_a = Compare(order=0) comparable_b = Compare(order=1) comparable_a.number_of_index_column = 1 # not supporting multi index comparable_a.number_of_unnamed_columns = 2 comparable_a.number_of_duplicate_columns = 2 comparable_a.number_of_disjunctive_columns = 2 comparable_a.number_of_not_checked_columns = 2 comparable_a.number_of_mapped_columns = 2 comparable_a.number_of_regular_columns = 2 header_validator.set_start_column(comparable_a, comparable_b) assert 14 == comparable_b.start_column assert 1 == comparable_a.start_column
def test_set_start_end_unnamed_column_2(self): comparable = Compare(order=0) comparable.start_column = 14 comparable.number_of_index_column = 1 comparable.number_of_mapped_columns = 2 comparable.number_of_regular_columns = 2 comparable.number_of_not_checked_columns = 2 comparable.number_of_disjunctive_columns = 2 comparable.number_of_duplicate_columns = 2 comparable.number_of_unnamed_columns = 2 header_validator.set_start_end_unnamed_column(comparable) assert 25 == comparable.unnamed_column_start assert 26 == comparable.unnamed_column_end
def set_start_end_duplicate_column(comparable: Compare): if comparable.number_of_duplicate_columns > 0: comparable.duplicate_column_start = \ comparable.start_column \ + comparable.number_of_index_column \ + comparable.number_of_regular_columns \ + comparable.number_of_mapped_columns \ + comparable.number_of_not_checked_columns \ + comparable.number_of_disjunctive_columns \ + 1 - 1 comparable.duplicate_column_end = \ comparable.duplicate_column_start \ + comparable.number_of_duplicate_columns \ - 1
def add_local_excel_format(comparable: Compare): comparable.column_general_format = comparable.workbook.add_format( excel_schema.general_column_format[comparable.order]) comparable.header_format_left_border = comparable.workbook.add_format( excel_schema.header_format_left_border[comparable.order]) comparable.header_format_left_bottom_border = comparable.workbook.add_format( excel_schema.header_format_left_bottom_border[comparable.order]) comparable.header_format_bottom_border = comparable.workbook.add_format( excel_schema.header_format_bottom_border[comparable.order]) comparable.header_format = comparable.workbook.add_format( excel_schema.header_format[comparable.order])