Пример #1
0
 def test_unique_values(self):
     """Test if the validator checks for the unique values."""
     validator = SeriesValidator(data=self.species,
                                 rules=self.species_rules)
     cleaned = validator.clean()
     self.assertItemsEqual(cleaned.unique(),
                           self.dataframe['Species'].unique())
Пример #2
0
 def test_unique_values(self):
     """Test if the validator checks for the unique values."""
     validator = SeriesValidator(data=self.species,
                                 rules=self.species_rules)
     cleaned = validator.clean()
     self.assertItemsEqual(cleaned.unique(),
                           self.dataframe['Species'].unique())
Пример #3
0
 def test_postprocessor(self):
     """Test if postporocessors work for series data."""
     self.species_rules['postprocessors'] = [_dummy_postproc]
     validator = SeriesValidator(data=self.species, rules=self.species_rules)
     try:
         cleaned = validator.clean()
         self.assertNotIn("setosa", cleaned.unique())
     finally:
         del self.species_rules['postprocessors']
Пример #4
0
 def test_postprocessor(self):
     """Test if postporocessors work for series data."""
     self.species_rules['postprocessors'] = [_dummy_postproc]
     validator = SeriesValidator(data=self.species,
                                 rules=self.species_rules)
     try:
         cleaned = validator.clean()
         self.assertNotIn("setosa", cleaned.unique())
     finally:
         del self.species_rules['postprocessors']
Пример #5
0
 def test_drop_duplicates(self):
     """Check if the SeriesValidator drops duplicates in the series."""
     self.species_rules['drop_duplicates'] = True
     try:
         series = self.species.unique().tolist()
         validator = SeriesValidator(data=self.species,
                                     rules=self.species_rules)
         cleaned = validator.clean()
         self.assertEqual(cleaned.shape[0], 3)
         self.assertItemsEqual(cleaned.tolist(), series)
     finally:
         self.species_rules['drop_duplicates'] = False
Пример #6
0
    def test_bad_unique_values(self):
        """Test if the validator drops values not specified in the schema."""
        # Add some bogus values
        noise = np.random.choice(['lily', 'petunia'], size=(50,))
        species = np.hstack((self.species.values, noise))
        np.random.shuffle(species)
        species = pd.Series(species)

        validator = SeriesValidator(data=species, rules=self.species_rules)
        cleaned = validator.clean()
        self.assertItemsEqual(cleaned.unique(),
                              self.dataframe['Species'].unique())
Пример #7
0
 def test_drop_duplicates(self):
     """Check if the SeriesValidator drops duplicates in the series."""
     self.species_rules['drop_duplicates'] = True
     try:
         series = self.species.unique().tolist()
         validator = SeriesValidator(data=self.species,
                                     rules=self.species_rules)
         cleaned = validator.clean()
         self.assertEqual(cleaned.shape[0], 3)
         self.assertItemsEqual(cleaned.tolist(), series)
     finally:
         self.species_rules['drop_duplicates'] = False
Пример #8
0
    def test_bad_unique_values(self):
        """Test if the validator drops values not specified in the schema."""
        # Add some bogus values
        noise = np.random.choice(['lily', 'petunia'], size=(50,))
        species = np.hstack((self.species.values, noise))
        np.random.shuffle(species)
        species = pd.Series(species)

        validator = SeriesValidator(data=species, rules=self.species_rules)
        cleaned = validator.clean()
        self.assertItemsEqual(cleaned.unique(),
                              self.dataframe['Species'].unique())
Пример #9
0
 def test_min_max_rules(self):
     """Test if the validator enforces min and max values from schema."""
     self.sepal_length_rules['min'] = 5.0
     self.sepal_length_rules['max'] = 7.0
     try:
         validator = SeriesValidator(data=self.sepal_length,
                                     rules=self.sepal_length_rules)
         cleaned = validator.clean()
         self.assertLessEqual(cleaned.max(), 7.0)
         self.assertGreaterEqual(cleaned.min(), 5.0)
     finally:
         del self.sepal_length_rules['max']
         del self.sepal_length_rules['min']
Пример #10
0
 def test_min_max_rules(self):
     """Test if the validator enforces min and max values from schema."""
     self.sepal_length_rules['min'] = 5.0
     self.sepal_length_rules['max'] = 7.0
     try:
         validator = SeriesValidator(data=self.sepal_length,
                                     rules=self.sepal_length_rules)
         cleaned = validator.clean()
         self.assertLessEqual(cleaned.max(), 7.0)
         self.assertGreaterEqual(cleaned.min(), 5.0)
     finally:
         del self.sepal_length_rules['max']
         del self.sepal_length_rules['min']
Пример #11
0
 def test_converter(self):
     """Test if the SeriesValidator properly applies converters."""
     self.species_rules['converters'] = [_dummy_converter]
     try:
         validator = SeriesValidator(data=self.species,
                                     rules=self.species_rules)
         cleaned = validator.clean()
         cleaned = cleaned.astype(bool)
         filtered = self.species[cleaned]
         self.assertEqual(filtered.nunique(), 1)
         self.assertItemsEqual(filtered.unique(), ['setosa'])
     finally:
         del self.species_rules['converters']
Пример #12
0
 def test_drop_na(self):
     """Check if the SeriesValidator drops NAs in the series."""
     self.species_rules['drop_na'] = True
     try:
         unqs = np.random.choice(self.species.unique().tolist() + [None],
                                 size=(100, ))
         unqs = pd.Series(unqs)
         validator = SeriesValidator(data=unqs, rules=self.species_rules)
         cleaned = validator.clean()
         self.assertEqual(cleaned.nunique(), self.species.nunique())
         self.assertItemsEqual(cleaned.unique().tolist(),
                               self.species.unique().tolist())
     finally:
         self.species_rules['drop_na'] = False
Пример #13
0
 def test_drop_na(self):
     """Check if the SeriesValidator drops NAs in the series."""
     self.species_rules['drop_na'] = True
     try:
         unqs = np.random.choice(self.species.unique().tolist() + [None],
                                 size=(100,))
         unqs = pd.Series(unqs)
         validator = SeriesValidator(data=unqs,
                                     rules=self.species_rules)
         cleaned = validator.clean()
         self.assertEqual(cleaned.nunique(), self.species.nunique())
         self.assertItemsEqual(cleaned.unique().tolist(),
                               self.species.unique().tolist())
     finally:
         self.species_rules['drop_na'] = False
Пример #14
0
    def test_regex_filter(self):
        """Test if the SeriesValidator does filtering based on the regular
        expression provided.
        """
        self.species_rules['regex'] = r'\b[a-z]+\b'
        try:
            validator = SeriesValidator(data=self.species,
                                        rules=self.species_rules)
            cleaned = validator.clean()
            self.assertSeriesEqual(cleaned, self.dataframe['Species'])

            self.species = self.dataframe['Species'].copy()
            self.species = self.species.apply(lambda x: x.replace("e", "1"))
            validator = SeriesValidator(data=self.species,
                                        rules=self.species_rules)
            cleaned = validator.clean()
            self.assertItemsEqual(cleaned.shape, (50,))
            self.assertItemsEqual(cleaned.unique().tolist(), ['virginica'])
        finally:
            del self.species_rules['regex']
Пример #15
0
    def test_regex_filter(self):
        """Test if the SeriesValidator does filtering based on the regular
        expression provided.
        """
        self.species_rules['regex'] = r'\b[a-z]+\b'
        try:
            validator = SeriesValidator(data=self.species,
                                        rules=self.species_rules)
            cleaned = validator.clean()
            self.assertSeriesEqual(cleaned, self.dataframe['Species'])

            self.species = self.dataframe['Species'].copy()
            self.species = self.species.apply(lambda x: x.replace("e", "1"))
            validator = SeriesValidator(data=self.species,
                                        rules=self.species_rules)
            cleaned = validator.clean()
            self.assertItemsEqual(cleaned.shape, (50,))
            self.assertItemsEqual(cleaned.unique().tolist(), ['virginica'])
        finally:
            del self.species_rules['regex']
Пример #16
0
 def test_numerical_series(self):
     """Test if the SeriesValidator works on a numerical series."""
     validator = SeriesValidator(data=self.sepal_length,
                                 rules=self.sepal_length_rules)
     cleaned = validator.clean()
     self.assertSeriesEqual(cleaned, self.dataframe['Sepal Length'])
Пример #17
0
 def test_numerical_series(self):
     """Test if the SeriesValidator works on a numerical series."""
     validator = SeriesValidator(data=self.sepal_length,
                                 rules=self.sepal_length_rules)
     cleaned = validator.clean()
     self.assertSeriesEqual(cleaned, self.dataframe['Sepal Length'])