def test_to_and_from_json_convert_integers(self):
        # training data
        d = {
            "col1": [1, 2, 3],
            "col2": ["a", "a", "c"],
            "col3": [1, 1, 3],
            "col4": ["a", "b", "c"],
        }
        df = pd.DataFrame(data=d)
        cat1 = PreprocessingCategorical(
            df.columns, PreprocessingCategorical.CONVERT_INTEGER)
        cat1.fit(df)

        cat2 = PreprocessingCategorical(
            df.columns, PreprocessingCategorical.CONVERT_INTEGER)
        cat2.from_json(cat1.to_json())
        df = cat2.transform(df)
        for col in ["col1", "col2", "col3", "col4"]:
            self.assertTrue(col in df.columns)
        self.assertEqual(df["col2"][0], 0)
        self.assertEqual(df["col2"][1], 0)
        self.assertEqual(df["col2"][2], 1)
        self.assertEqual(df["col4"][0], 0)
        self.assertEqual(df["col4"][1], 1)
        self.assertEqual(df["col4"][2], 2)
Exemplo n.º 2
0
    def from_json(self, data_json):

        if "remove_columns" in data_json:
            self._remove_columns = data_json.get("remove_columns", [])
        if "missing_values" in data_json:
            self._missing_values = []
            for mv_data in data_json["missing_values"]:
                mv = PreprocessingMissingValues()
                mv.from_json(mv_data)
                self._missing_values += [mv]
        if "categorical" in data_json:
            self._categorical = []
            for cat_data in data_json["categorical"]:
                cat = PreprocessingCategorical()
                cat.from_json(cat_data)
                self._categorical += [cat]
        if "scale" in data_json:
            self._scale = []
            for scale_data in data_json["scale"]:
                sc = Scale()
                sc.from_json(scale_data)
                self._scale += [sc]
        if "categorical_y" in data_json:
            if "new_columns" in data_json["categorical_y"]:
                self._categorical_y = LabelBinarizer()
            else:
                self._categorical_y = LabelEncoder()

            self._categorical_y.from_json(data_json["categorical_y"])
        if "scale_y" in data_json:
            self._scale_y = Scale()
            self._scale_y.from_json(data_json["scale_y"])
        if "ml_task" in data_json:
            self._params["ml_task"] = data_json["ml_task"]
Exemplo n.º 3
0
    def from_json(self, data_json):

        self._params = data_json.get("params", self._params)

        if "remove_columns" in data_json:
            self._remove_columns = data_json.get("remove_columns", [])
        if "missing_values" in data_json:
            self._missing_values = []
            for mv_data in data_json["missing_values"]:
                mv = PreprocessingMissingValues()
                mv.from_json(mv_data)
                self._missing_values += [mv]
        if "categorical" in data_json:
            self._categorical = []
            for cat_data in data_json["categorical"]:
                cat = PreprocessingCategorical()
                cat.from_json(cat_data)
                self._categorical += [cat]

        if "datetime_transforms" in data_json:
            self._datetime_transforms = []
            for dtt_params in data_json["datetime_transforms"]:
                dtt = DateTimeTransformer()
                dtt.from_json(dtt_params)
                self._datetime_transforms += [dtt]

        if "text_transforms" in data_json:
            self._text_transforms = []
            for tt_params in data_json["text_transforms"]:
                tt = TextTransformer()
                tt.from_json(tt_params)
                self._text_transforms += [tt]

        if "golden_features" in data_json:
            self._golden_features = GoldenFeaturesTransformer()
            self._golden_features.from_json(data_json["golden_features"])

        if "scale" in data_json:
            self._scale = []
            for scale_data in data_json["scale"]:
                sc = Scale()
                sc.from_json(scale_data)
                self._scale += [sc]
        if "categorical_y" in data_json:
            if "new_columns" in data_json["categorical_y"]:
                self._categorical_y = LabelBinarizer()
            else:
                self._categorical_y = LabelEncoder()

            self._categorical_y.from_json(data_json["categorical_y"])
        if "scale_y" in data_json:
            self._scale_y = Scale()
            self._scale_y.from_json(data_json["scale_y"])
        if "ml_task" in data_json:
            self._params["ml_task"] = data_json["ml_task"]

        self._add_random_feature = data_json.get("add_random_feature", False)
        self._drop_features = data_json.get("drop_features", [])