def test_validate_instance_invalid_environment(self):
        instance = {'feature': np.array(['A'])}
        schema = text_format.Parse(
            """
        default_environment: "TRAINING"
        default_environment: "SERVING"
        feature {
          name: "label"
          not_in_environment: "SERVING"
          value_count { min: 1 max: 1 }
          presence { min_count: 1 }
          type: BYTES
        }
        feature {
          name: "feature"
          value_count { min: 1 max: 1 }
          presence { min_count: 1 }
          type: BYTES
        }
        """, schema_pb2.Schema())
        options = stats_options.StatsOptions(schema=schema)

        with self.assertRaisesRegexp(ValueError,
                                     'Environment.*not found in the schema.*'):
            _ = validation_api.validate_instance(instance,
                                                 options,
                                                 environment='INVALID')
 def test_validate_instance_stats_options_without_schema(self):
     instance = {'feature': np.array(['A'])}
     # This instance of StatsOptions has no schema.
     options = stats_options.StatsOptions()
     with self.assertRaisesRegexp(ValueError,
                                  'options must include a schema.'):
         _ = validation_api.validate_instance(instance, options)
    def test_validate_instance_environment(self):
        instance = {'feature': np.array(['A'])}
        schema = text_format.Parse(
            """
        default_environment: "TRAINING"
        default_environment: "SERVING"
        feature {
          name: "label"
          not_in_environment: "SERVING"
          value_count { min: 1 max: 1 }
          presence { min_count: 1 }
          type: BYTES
        }
        feature {
          name: "feature"
          value_count { min: 1 max: 1 }
          presence { min_count: 1 }
          type: BYTES
        }
        """, schema_pb2.Schema())
        options = stats_options.StatsOptions(schema=schema)

        # Validate the instance in TRAINING environment.
        expected_anomalies_training = {
            'label':
            text_format.Parse(
                """
            description: "Column is completely missing"
            severity: ERROR
            short_description: "Column dropped"
            reason {
              type: SCHEMA_MISSING_COLUMN
              short_description: "Column dropped"
              description: "Column is completely missing"
            }
            """, anomalies_pb2.AnomalyInfo())
        }
        anomalies_training = validation_api.validate_instance(
            instance, options, environment='TRAINING')
        self._assert_equal_anomalies(anomalies_training,
                                     expected_anomalies_training)

        # Validate the instance in SERVING environment.
        anomalies_serving = validation_api.validate_instance(
            instance, options, environment='SERVING')
        self._assert_equal_anomalies(anomalies_serving, {})
 def test_validate_instance_global_only_anomaly_type(self):
     instance = {'annotated_enum': np.array(['D'])}
     # This schema has a presence.min_count > 1, which will generate an anomaly
     # of type FEATURE_TYPE_LOW_NUMBER_PRESENT when any single example is
     # validated using this schema. This test checks that this anomaly type
     # (which is not meaningful in per-example validation) is not included in the
     # Anomalies proto that validate_instance returns.
     schema = text_format.Parse(
         """
     string_domain {
       name: "MyAloneEnum"
       value: "A"
       value: "B"
       value: "C"
     }
     feature {
       name: "annotated_enum"
       value_count {
         min:1
         max:1
       }
       presence {
         min_count: 5
       }
       type: BYTES
       domain: "MyAloneEnum"
     }
     feature {
       name: "ignore_this"
       lifecycle_stage: DEPRECATED
       value_count {
         min:1
       }
       presence {
         min_count: 1
       }
       type: BYTES
     }
     """, schema_pb2.Schema())
     expected_anomalies = {
         'annotated_enum':
         text_format.Parse(
             """
   description: "Examples contain values missing from the schema: D "
     "(~100%). "
   severity: ERROR
   short_description: "Unexpected string values"
   reason {
     type: ENUM_TYPE_UNEXPECTED_STRING_VALUES
     short_description: "Unexpected string values"
     description: "Examples contain values missing from the schema: D "
       "(~100%). "
   }
         """, anomalies_pb2.AnomalyInfo())
     }
     options = stats_options.StatsOptions(schema=schema)
     anomalies = validation_api.validate_instance(instance, options)
     self._assert_equal_anomalies(anomalies, expected_anomalies)
 def test_validate_instance(self):
     instance = {'annotated_enum': np.array(['D'])}
     schema = text_format.Parse(
         """
     string_domain {
       name: "MyAloneEnum"
       value: "A"
       value: "B"
       value: "C"
     }
     feature {
       name: "annotated_enum"
       value_count {
         min:1
         max:1
       }
       presence {
         min_count: 1
       }
       type: BYTES
       domain: "MyAloneEnum"
     }
     feature {
       name: "ignore_this"
       lifecycle_stage: DEPRECATED
       value_count {
         min:1
       }
       presence {
         min_count: 1
       }
       type: BYTES
     }
     """, schema_pb2.Schema())
     expected_anomalies = {
         'annotated_enum':
         text_format.Parse(
             """
   description: "Examples contain values missing from the schema: D "
     "(~100%). "
   severity: ERROR
   short_description: "Unexpected string values"
   reason {
     type: ENUM_TYPE_UNEXPECTED_STRING_VALUES
     short_description: "Unexpected string values"
     description: "Examples contain values missing from the schema: D "
       "(~100%). "
   }
         """, anomalies_pb2.AnomalyInfo())
     }
     options = stats_options.StatsOptions(schema=schema)
     anomalies = validation_api.validate_instance(instance, options)
     self._assert_equal_anomalies(anomalies, expected_anomalies)
 def test_validate_instance_invalid_options(self):
     instance = {'feature': np.array(['A'])}
     with self.assertRaisesRegexp(ValueError,
                                  'options must be a StatsOptions object.'):
         _ = validation_api.validate_instance(instance, {})
Пример #7
0
 def test_validate_instance_stats_options_without_schema(self):
   instance = pa.Table.from_arrays([pa.array([['A']])], ['feature'])
   # This instance of StatsOptions has no schema.
   options = stats_options.StatsOptions()
   with self.assertRaisesRegexp(ValueError, 'options must include a schema.'):
     _ = validation_api.validate_instance(instance, options)
Пример #8
0
 def test_validate_instance_invalid_options(self):
   instance = pa.Table.from_arrays([pa.array([['A']])], ['feature'])
   with self.assertRaisesRegexp(ValueError,
                                'options must be a StatsOptions object.'):
     _ = validation_api.validate_instance(instance, {})