Пример #1
0
    def test_ndarray_to_schema(self):
        from lale.datasets.data_schemas import to_schema
        from lale.type_checking import validate_schema

        all_X, all_y = self._irisArr["X"], self._irisArr["y"]
        assert not hasattr(all_X, "json_schema")
        all_X_schema = to_schema(all_X)
        validate_schema(all_X, all_X_schema, subsample_array=False)
        assert not hasattr(all_y, "json_schema")
        all_y_schema = to_schema(all_y)
        validate_schema(all_y, all_y_schema, subsample_array=False)
        all_X_expected = {
            "type": "array",
            "minItems": 150,
            "maxItems": 150,
            "items": {
                "type": "array",
                "minItems": 4,
                "maxItems": 4,
                "items": {
                    "type": "number"
                },
            },
        }
        all_y_expected = {
            "type": "array",
            "minItems": 150,
            "maxItems": 150,
            "items": {
                "type": "integer"
            },
        }
        self.maxDiff = None
        self.assertEqual(all_X_schema, all_X_expected)
        self.assertEqual(all_y_schema, all_y_expected)
Пример #2
0
 def test_pandas_to_schema(self):
     from lale.datasets.data_schemas import to_schema
     from lale.type_checking import validate_schema
     import pandas as pd
     train_X, train_y = self._irisDf['X'], self._irisDf['y']
     assert isinstance(train_X, pd.DataFrame)
     assert not hasattr(train_X, 'json_schema')
     train_X_schema = to_schema(train_X)
     validate_schema(train_X, train_X_schema, subsample_array=False)
     assert isinstance(train_y, pd.Series)
     assert not hasattr(train_y, 'json_schema')
     train_y_schema = to_schema(train_y)
     validate_schema(train_y, train_y_schema, subsample_array=False)
     train_X_expected = {
         'type': 'array', 'minItems': 120, 'maxItems': 120,
         'items': {
             'type': 'array', 'minItems': 4, 'maxItems': 4,
             'items': [
                 {'description': 'sepal length (cm)', 'type': 'number'},
                 {'description': 'sepal width (cm)', 'type': 'number'},
                 {'description': 'petal length (cm)', 'type': 'number'},
                 {'description': 'petal width (cm)', 'type': 'number'}]}}
     train_y_expected = {
         'type': 'array', 'minItems': 120, 'maxItems': 120,
         'items': {'description': 'target', 'type': 'integer'}}
     self.maxDiff = None
     self.assertEqual(train_X_schema, train_X_expected)
     self.assertEqual(train_y_schema, train_y_expected)
Пример #3
0
 def test_ndarray_to_schema(self):
     from lale.datasets.data_schemas import to_schema
     from lale.type_checking import validate_schema
     all_X, all_y = self._irisArr['X'], self._irisArr['y']
     assert not hasattr(all_X, 'json_schema')
     all_X_schema = to_schema(all_X)
     validate_schema(all_X, all_X_schema, subsample_array=False)
     assert not hasattr(all_y, 'json_schema')
     all_y_schema = to_schema(all_y)
     validate_schema(all_y, all_y_schema, subsample_array=False)
     all_X_expected = {
         'type': 'array',
         'minItems': 150,
         'maxItems': 150,
         'items': {
             'type': 'array',
             'minItems': 4,
             'maxItems': 4,
             'items': {
                 'type': 'number'
             }
         }
     }
     all_y_expected = {
         'type': 'array',
         'minItems': 150,
         'maxItems': 150,
         'items': {
             'type': 'integer'
         }
     }
     self.maxDiff = None
     self.assertEqual(all_X_schema, all_X_expected)
     self.assertEqual(all_y_schema, all_y_expected)
Пример #4
0
    def test_pandas_to_schema(self):
        import pandas as pd

        from lale.datasets.data_schemas import to_schema
        from lale.type_checking import validate_schema

        train_X, train_y = self._irisDf["X"], self._irisDf["y"]
        assert isinstance(train_X, pd.DataFrame)
        assert not hasattr(train_X, "json_schema")
        train_X_schema = to_schema(train_X)
        validate_schema(train_X, train_X_schema, subsample_array=False)
        assert isinstance(train_y, pd.Series)
        assert not hasattr(train_y, "json_schema")
        train_y_schema = to_schema(train_y)
        validate_schema(train_y, train_y_schema, subsample_array=False)
        train_X_expected = {
            "type": "array",
            "minItems": 120,
            "maxItems": 120,
            "items": {
                "type":
                "array",
                "minItems":
                4,
                "maxItems":
                4,
                "items": [
                    {
                        "description": "sepal length (cm)",
                        "type": "number"
                    },
                    {
                        "description": "sepal width (cm)",
                        "type": "number"
                    },
                    {
                        "description": "petal length (cm)",
                        "type": "number"
                    },
                    {
                        "description": "petal width (cm)",
                        "type": "number"
                    },
                ],
            },
        }
        train_y_expected = {
            "type": "array",
            "minItems": 120,
            "maxItems": 120,
            "items": {
                "description": "target",
                "type": "integer"
            },
        }
        self.maxDiff = None
        self.assertEqual(train_X_schema, train_X_expected)
        self.assertEqual(train_y_schema, train_y_expected)
Пример #5
0
 def test_datasets_with_own_schemas(self):
     from lale.datasets.data_schemas import to_schema
     from lale.type_checking import validate_schema
     for name in ['irisArr', 'irisDf', 'digits', 'housing', 'creditG', 'movies', 'drugRev']:
         dataset = getattr(self, f'_{name}')
         data_X, data_y = dataset['X'], dataset['y']
         schema_X, schema_y = to_schema(data_X), to_schema(data_y)
         validate_schema(data_X, schema_X, subsample_array=False)
         validate_schema(data_y, schema_y, subsample_array=False)
Пример #6
0
    def test_datasets_with_own_schemas(self):
        from lale.datasets.data_schemas import to_schema
        from lale.type_checking import validate_schema

        for name in [
                "irisArr",
                "irisDf",
                "digits",
                "housing",
                "creditG",
                "movies",
                "drugRev",
        ]:
            dataset = getattr(self, f"_{name}")
            data_X, data_y = dataset["X"], dataset["y"]
            schema_X, schema_y = to_schema(data_X), to_schema(data_y)
            validate_schema(data_X, schema_X, subsample_array=False)
            validate_schema(data_y, schema_y, subsample_array=False)
Пример #7
0
    def test_arff_to_schema(self):
        from lale.datasets.data_schemas import to_schema
        from lale.type_checking import validate_schema

        train_X, train_y = self._creditG["X"], self._creditG["y"]
        assert hasattr(train_X, "json_schema")
        train_X_schema = to_schema(train_X)
        validate_schema(train_X, train_X_schema, subsample_array=False)
        assert hasattr(train_y, "json_schema")
        train_y_schema = to_schema(train_y)
        validate_schema(train_y, train_y_schema, subsample_array=False)
        train_X_expected = {
            "type": "array",
            "minItems": 670,
            "maxItems": 670,
            "items": {
                "type":
                "array",
                "minItems":
                20,
                "maxItems":
                20,
                "items": [
                    {
                        "description": "checking_status",
                        "enum": ["<0", "0<=X<200", ">=200", "no checking"],
                    },
                    {
                        "description": "duration",
                        "type": "number"
                    },
                    {
                        "description":
                        "credit_history",
                        "enum": [
                            "no credits/all paid",
                            "all paid",
                            "existing paid",
                            "delayed previously",
                            "critical/other existing credit",
                        ],
                    },
                    {
                        "description":
                        "purpose",
                        "enum": [
                            "new car",
                            "used car",
                            "furniture/equipment",
                            "radio/tv",
                            "domestic appliance",
                            "repairs",
                            "education",
                            "vacation",
                            "retraining",
                            "business",
                            "other",
                        ],
                    },
                    {
                        "description": "credit_amount",
                        "type": "number"
                    },
                    {
                        "description":
                        "savings_status",
                        "enum": [
                            "<100",
                            "100<=X<500",
                            "500<=X<1000",
                            ">=1000",
                            "no known savings",
                        ],
                    },
                    {
                        "description": "employment",
                        "enum":
                        ["unemployed", "<1", "1<=X<4", "4<=X<7", ">=7"],
                    },
                    {
                        "description": "installment_commitment",
                        "type": "number"
                    },
                    {
                        "description":
                        "personal_status",
                        "enum": [
                            "male div/sep",
                            "female div/dep/mar",
                            "male single",
                            "male mar/wid",
                            "female single",
                        ],
                    },
                    {
                        "description": "other_parties",
                        "enum": ["none", "co applicant", "guarantor"],
                    },
                    {
                        "description": "residence_since",
                        "type": "number"
                    },
                    {
                        "description":
                        "property_magnitude",
                        "enum": [
                            "real estate",
                            "life insurance",
                            "car",
                            "no known property",
                        ],
                    },
                    {
                        "description": "age",
                        "type": "number"
                    },
                    {
                        "description": "other_payment_plans",
                        "enum": ["bank", "stores", "none"],
                    },
                    {
                        "description": "housing",
                        "enum": ["rent", "own", "for free"]
                    },
                    {
                        "description": "existing_credits",
                        "type": "number"
                    },
                    {
                        "description":
                        "job",
                        "enum": [
                            "unemp/unskilled non res",
                            "unskilled resident",
                            "skilled",
                            "high qualif/self emp/mgmt",
                        ],
                    },
                    {
                        "description": "num_dependents",
                        "type": "number"
                    },
                    {
                        "description": "own_telephone",
                        "enum": ["none", "yes"]
                    },
                    {
                        "description": "foreign_worker",
                        "enum": ["yes", "no"]
                    },
                ],
            },
        }
        train_y_expected = {
            "type": "array",
            "minItems": 670,
            "maxItems": 670,
            "items": {
                "description": "class",
                "enum": ["good", "bad"]
            },
        }
        self.maxDiff = None
        self.assertEqual(train_X_schema, train_X_expected)
        self.assertEqual(train_y_schema, train_y_expected)
Пример #8
0
 def test_arff_to_schema(self):
     from lale.datasets.data_schemas import to_schema
     from lale.type_checking import validate_schema
     train_X, train_y = self._creditG['X'], self._creditG['y']
     assert hasattr(train_X, 'json_schema')
     train_X_schema = to_schema(train_X)
     validate_schema(train_X, train_X_schema, subsample_array=False)
     assert hasattr(train_y, 'json_schema')
     train_y_schema = to_schema(train_y)
     validate_schema(train_y, train_y_schema, subsample_array=False)
     train_X_expected = {
         'type': 'array', 'minItems': 670, 'maxItems': 670,
         'items': {
             'type': 'array', 'minItems': 20, 'maxItems': 20,
             'items': [
                 {'description': 'checking_status', 'enum': [
                     '<0', '0<=X<200', '>=200', 'no checking']},
                 {'description': 'duration', 'type': 'number'},
                 {'description': 'credit_history', 'enum': [
                     'no credits/all paid', 'all paid',
                     'existing paid', 'delayed previously',
                     'critical/other existing credit']},
                 {'description': 'purpose', 'enum': [
                     'new car', 'used car', 'furniture/equipment',
                     'radio/tv', 'domestic appliance', 'repairs',
                     'education', 'vacation', 'retraining', 'business',
                     'other']},
                 {'description': 'credit_amount', 'type': 'number'},
                 {'description': 'savings_status', 'enum': [
                     '<100', '100<=X<500', '500<=X<1000', '>=1000',
                     'no known savings']},
                 {'description': 'employment', 'enum': [
                     'unemployed', '<1', '1<=X<4', '4<=X<7', '>=7']},
                 {'description': 'installment_commitment', 'type': 'number'},
                 {'description': 'personal_status', 'enum': [
                     'male div/sep', 'female div/dep/mar', 'male single',
                     'male mar/wid', 'female single']},
                 {'description': 'other_parties', 'enum': [
                     'none', 'co applicant', 'guarantor']},
                 {'description': 'residence_since', 'type': 'number'},
                 {'description': 'property_magnitude', 'enum': [
                     'real estate', 'life insurance', 'car',
                     'no known property']},
                 {'description': 'age', 'type': 'number'},
                 {'description': 'other_payment_plans', 'enum': [
                     'bank', 'stores', 'none']},
                 {'description': 'housing', 'enum': [
                     'rent', 'own', 'for free']},
                 {'description': 'existing_credits', 'type': 'number'},
                 {'description': 'job', 'enum': [
                     'unemp/unskilled non res', 'unskilled resident',
                     'skilled', 'high qualif/self emp/mgmt']},
                 {'description': 'num_dependents', 'type': 'number'},
                 {'description': 'own_telephone', 'enum': ['none', 'yes']},
                 {'description': 'foreign_worker', 'enum': ['yes', 'no']}]}}
     train_y_expected = {
         'type': 'array', 'minItems': 670, 'maxItems': 670,
         'items': {'description': 'class', 'enum': [0, 1]}}
     self.maxDiff = None
     self.assertEqual(train_X_schema, train_X_expected)
     self.assertEqual(train_y_schema, train_y_expected)