def test_to_and_from_json_booleans(self): # training data d = {"col1": ["a", "a", "c"], "col2": [True, True, False]} df = pd.DataFrame(data=d) # fit binarizer lb1 = LabelBinarizer() lb1.fit(df, "col1") lb2 = LabelBinarizer() lb2.fit(df, "col2") # test data d_test = { "col1": ["c", "c", "a"], "col2": [False, False, True], "col3": [2, 3, 4], } df_test = pd.DataFrame(data=d_test) # to json and from json new_lb1 = LabelBinarizer() new_lb2 = LabelBinarizer() new_lb1.from_json(lb1.to_json()) new_lb2.from_json(json.loads(json.dumps(lb2.to_json(), indent=4))) # transform df_test = new_lb1.transform(df_test, "col1") df_test = new_lb2.transform(df_test, "col2") # for binary column, only one value is left, old column should be deleted self.assertTrue("col1_c" in df_test.columns) self.assertTrue("col1" not in df_test.columns) self.assertEqual(2, np.sum(df_test["col1_c"])) # for multiple value colum, all columns should be added self.assertTrue("col2_True" in df_test.columns) self.assertTrue("col2" not in df_test.columns) self.assertEqual(1, np.sum(df_test["col2_True"])) # do not touch continuous attribute self.assertTrue("col3" in df_test.columns)
def inverse_transform(self, X): for column, lbl_params in self._convert_params.items(): if "unique_values" in lbl_params and "new_columns" in lbl_params: # convert to one hot lbl = LabelBinarizer() lbl.from_json(lbl_params) X = lbl.inverse_transform(X, column) # should raise exception else: # convert to integer lbl = LabelEncoder() lbl.from_json(lbl_params) X.loc[:, column] = lbl.inverse_transform(X.loc[:, column]) return X
def transform(self, X): if (self._convert_method == PreprocessingCategorical.CONVERT_LOO and self._columns): return self._enc.transform(X) else: for column, lbl_params in self._convert_params.items(): if "unique_values" in lbl_params and "new_columns" in lbl_params: # convert to one hot lbl = LabelBinarizer() lbl.from_json(lbl_params) X = lbl.transform(X, column) else: # convert to integer lbl = LabelEncoder() lbl.from_json(lbl_params) X.loc[:, column] = lbl.transform(X.loc[:, column]) return X