示例#1
0
 def test_integer_success(self):
     c = pd.Series(['1', '2', '3'], name='test')
     p = DatatypeProcessor(Datatype.INTEGER)
     report = FileFormatReport()
     c2 = p.process(c, report)
     assert list(c2) == [1, 2, 3]
     assert report.errors == []
示例#2
0
 def test_success(self):
     c = pd.Series(['a', 'b', 'c'], name='test')
     p = UniquenessProcessor()
     report = FileFormatReport()
     c2 = p.process(c, report)
     assert c is c2
     assert report.errors == []
示例#3
0
 def test_missing(self):
     c = pd.Series(['Yes', 'No', '', None], name='test')
     p = OptionsProcessor(options=['Yes', 'No'])
     report = FileFormatReport()
     c2 = p.process(c, report)
     assert c is c2
     assert report.errors == []
示例#4
0
    def test_success(self):
        c = pd.Series(['FG10001', 'FG2945', 'FG1249'], name='test')
        p = RegexProcessor(regex=r'FG\d+')

        report = FileFormatReport()
        c2 = p.process(c, report)
        assert c is c2
        assert report.errors == []
示例#5
0
 def test_float_success(self):
     c = pd.Series(['1.1', '2', '3'], name='test')
     p = DatatypeProcessor(Datatype.FLOAT)
     report = FileFormatReport()
     c2 = p.process(c, report)
     expected = pd.Series([1.1, 2.0, 3.0], name="test", dtype="float32")
     assert c2.equals(expected)
     assert report.errors == []
示例#6
0
 def test_date_success(self):
     c = pd.Series(['10/05/2020', '11/05/2020', '12/05/2020'], name='test')
     p = DatatypeProcessor(Datatype.DATE, dateformat="DD/MM/YYYY")
     report = FileFormatReport()
     c2 = p.process(c, report)
     expected = pd.Series(["2020-05-10", "2020-05-11", "2020-05-12"],
                          name="test",
                          dtype="str")
     assert c2.equals(expected)
     assert report.errors == []
示例#7
0
 def test_failure(self):
     c = pd.Series(['Yes', 'No', 'MayBe'], name='test')
     p = OptionsProcessor(options=['Yes', 'No'])
     report = FileFormatReport()
     c2 = p.process(c, report)
     assert c is c2
     assert len(report.errors) == 1
     e = report.errors[0]
     assert e.row_index == 2
     assert e.error_code == 'invalid_value'
     assert e.error_message == "The value is not one of the allowed options: 'MayBe'"
示例#8
0
 def test_failure(self):
     c = pd.Series(['FG10001', 'FG2945', 'X1249'], name='test')
     p = RegexProcessor(regex=r'FG\d+')
     report = FileFormatReport()
     c2 = p.process(c, report)
     assert c is c2
     assert len(report.errors) == 1
     e = report.errors[0]
     assert e.row_index == 2
     assert e.error_code == 'invalid_pattern'
     assert e.error_message == r"The value is not matching the pattern FG\d+: 'X1249'"
示例#9
0
 def test_failure(self):
     c = pd.Series(['a', 'b', 'a'], name='test')
     p = UniquenessProcessor()
     report = FileFormatReport()
     c2 = p.process(c, report)
     assert c is c2
     assert len(report.errors) == 1
     e = report.errors[0]
     assert e.row_index == 2
     assert e.error_code == 'duplicate_value'
     assert e.error_message == "Found duplicate value: 'a'"
示例#10
0
 def test_integer_failure(self):
     c = pd.Series(['1', '2', 'x', '4'], name='test')
     p = DatatypeProcessor(Datatype.INTEGER)
     report = FileFormatReport()
     c2 = p.process(c, report)
     assert list(c2) == [1, 2, pd.NA, 4]
     assert len(report.errors) == 1
     e = report.errors[0]
     assert e.row_index == 2
     assert e.error_code == 'invalid-value'
     assert e.error_message == "Invalid integer: 'x'"
示例#11
0
 def test_date_failure(self):
     c = pd.Series(['10/05/2020', '11/05/2020', '12-05-2020'], name='test')
     p = DatatypeProcessor(Datatype.DATE, dateformat="DD/MM/YYYY")
     report = FileFormatReport()
     c2 = p.process(c, report)
     expected = pd.Series(['2020-05-10', '2020-05-11', None],
                          name="test",
                          dtype="str")
     assert c2.equals(expected)
     assert len(report.errors) == 1
     e = report.errors[0]
     assert e.row_index == 2
     assert e.error_code == 'invalid-value'
     assert e.error_message == "Invalid date: '12-05-2020'"
示例#12
0
 def test_float_failure(self):
     c = pd.Series(['1.5', '2', 'x', '4'], name='test')
     p = DatatypeProcessor(Datatype.FLOAT)
     report = FileFormatReport()
     c2 = p.process(c, report)
     expected = pd.Series([1.5, 2.0, np.nan, 4.0],
                          name="test",
                          dtype="float32")
     assert c2.equals(expected)
     assert len(report.errors) == 1
     e = report.errors[0]
     assert e.row_index == 2
     assert e.error_code == 'invalid-value'
     assert e.error_message == "Invalid number: 'x'"