def test_facet_grid_scatter_plot(self): ## Check if it works without any error. np.random.seed(1) df = load_iris(target="species") df["grid_col"] = np.random.choice(["c1", "c2"], size=df.shape[0], replace=True) df["grid_row"] = np.random.choice(["r1", "r2", "r3"], size=df.shape[0], replace=True) facet_grid_scatter_plot(data=df, row="grid_row", col="grid_col", x="petal_width", y="petal_length", c="sepal_width") facet_grid_scatter_plot(data=df, row="grid_row", col="grid_col", x="petal_width", y="petal_length", hue="species")
def test_load_iris(self): """unittest for load_iris""" target = "species" cats = {"setosa", "versicolor", "virginica"} df = load_iris(target=target) self.assertTrue(isinstance(df, pd.DataFrame)) self.assertEqual((150, 5), df.shape) self.assertEqual(target, list(df.columns)[-1]) self.assertEqual(cats, set(df[target].unique()))
def setUpClass(cls) -> None: np.random.seed(1) ## iris (multi-class classification) df = load_iris(target="label") df = df.sample(frac=1, replace=False) cls.iris_X = df.drop("label", axis=1) cls.iris_y = df["label"] cls.iris_plr = GridSearchCV(LogisticRegression(solver="liblinear", multi_class="auto"), param_grid={"C": [0.1, 1]}, cv=3, iid=False, return_train_score=True) cls.iris_plr.fit(cls.iris_X, cls.iris_y) ## boston dataset (regression) target = "price" df = load_boston(target=target) df = df.sample(frac=1, replace=False) cls.boston_X = df.drop("price", axis=1) cls.boston_y = df["price"] cls.boston_tree = GridSearchCV(DecisionTreeRegressor(), param_grid={"max_depth": [3, 5, 7]}, cv=5, scoring="neg_mean_squared_error", iid=False, return_train_score=False) cls.boston_tree.fit(cls.boston_X, cls.boston_y) cls.boston_enet = GridSearchCV(ElasticNet(normalize=True), param_grid={ "alpha": [0.1, 1.0], "l1_ratio": [0.1, 0.5, 0.9] }, cv=3, scoring="neg_mean_squared_error", iid=False, return_train_score=True) cls.boston_enet.fit(cls.boston_X, cls.boston_y) ## breast (binary classification) df = load_breast_cancer("label") cls.breast_cancer_X = df.drop("label", axis=1) cls.breast_cancer_y = df["label"] cls.breast_cancer_plr = GridSearchCV( LogisticRegression(solver="liblinear"), param_grid={"C": [0.1, 1]}, cv=3) cls.breast_cancer_plr.fit(cls.breast_cancer_X, cls.breast_cancer_y)
def test_to_excel(self): df_iris = load_iris() df_boston = load_boston() df_cancer = load_breast_cancer().loc[:10, :] from openpyxl import load_workbook sheets = ["iris", "boston"] with TempDir() as temp_dir: excel_file = temp_dir.joinpath("new_file.xlsx") ## Case 1) Create a new file to_excel(df=df_iris, file=excel_file, sheet=sheets[0], libreoffice=True) wb1 = load_workbook(str(excel_file)) self.assertEqual([sheets[0]], wb1.sheetnames) ## whether we can read the excel through pandas.read_excel dg1 = pd.read_excel(excel_file, sheet_name=sheets[0]) self.assertEqual(df_iris.columns.tolist(), dg1.columns.tolist()) self.assertEqual(df_iris.shape, dg1.shape) ## Case 2) Add a sheet to an existing excel file to_excel(df=df_boston, file=excel_file, sheet=sheets[1], libreoffice=True) wb2 = load_workbook(str(excel_file)) self.assertEqual(sheets, wb2.sheetnames) dg2 = pd.read_excel(excel_file, sheet_name=sheets[1]) self.assertEqual(df_boston.columns.tolist(), dg2.columns.tolist()) self.assertEqual(df_boston.shape, dg2.shape) ## Case 3) Overwrite an existing sheet with a new table ## This table has no style. to_excel(df=df_cancer, file=excel_file, sheet=sheets[0], libreoffice=False) wb3 = load_workbook(str(excel_file)) self.assertEqual(sheets, wb3.sheetnames) dg3 = pd.read_excel(excel_file, sheet_name=sheets[0]) self.assertEqual(df_cancer.columns.tolist(), dg3.columns.tolist()) self.assertEqual(df_cancer.shape, dg3.shape)
def test_bins_heatmap(self): # check if this function works without error np.random.seed(2) df = load_iris(target="species") df["cat1"] = np.random.choice(["a", "b"], size=df.shape[0], replace=True) bins_heatmap(df, cat1="cat1", cat2="species", x="petal_width", y="petal_length", target="sepal_length", n_bins=3)
def test_bins_by_tree(self): """unittest for bins_by_tree""" df = load_iris(target="species") cols = list(df.columns) n_bins = 3 ## Case) The target variable is continuous bins = bins_by_tree(df, field=cols[2], target=cols[3], target_is_continuous=True, n_bins=n_bins, n_points=200, precision=1) cats = sorted(bins.unique()) self.assertIsInstance(bins, pd.Series) self.assertEqual(len(bins.unique()), n_bins) for cat in cats: self.assertIsInstance(cat, pd.Interval) self.assertEqual(cat.closed, "right") self.assertEqual(cats[0].left, -np.inf) self.assertEqual(cats[-1].right, np.inf) ## Case) The target variable is not continuous bins = bins_by_tree(df, field=cols[2], target=cols[-1], target_is_continuous=False, n_bins=n_bins, n_points=200, precision=1) cats = sorted(bins.unique()) self.assertIsInstance(bins, pd.Series) self.assertEqual(len(bins.unique()), n_bins) for cat in cats: self.assertIsInstance(cat, pd.Interval) self.assertEqual(cat.closed, "right") self.assertEqual(cats[0].left, -np.inf) self.assertEqual(cats[-1].right, np.inf)
def test_visualize_two_fields(self): # check if the function works without any error np.random.seed(1) df = load_iris(target="species") df["cat"] = np.random.choice(["a","b","c"], size=df.shape[0], replace=True) inspector = Inspector(df) ## continuous x continuous inspector.visualize_two_fields("sepal_width","sepal_length") ## continuous x categorical inspector.visualize_two_fields("sepal_length", "species") ## categorical x continuous inspector.visualize_two_fields("species", "petal_width") ## categorical x categorical inspector.visualize_two_fields("species","cat")
try: from adhoc.processing import Inspector from adhoc.modeling import show_tree from adhoc.utilities import load_iris, facet_grid_scatter_plot, bins_heatmap except ImportError: import sys sys.path.append("..") from adhoc.processing import Inspector from adhoc.modeling import show_tree from adhoc.utilities import load_iris, facet_grid_scatter_plot, bins_heatmap # + np.random.seed(1) df = load_iris(target="species") df["cat1"] = np.random.choice(["a", "b", "c"], size=df.shape[0], replace=True) df["cat2"] = (df.iloc[:, 0] * df.iloc[:, 1] - df.iloc[:, 2] * df.iloc[:, 3] > 11).map({ True: 1, False: 0 }) inspector = Inspector(df) inspector ## 4 continuous variables and 3 categorical variables # - inspector.visualize_two_fields("sepal_width", "sepal_length") ## continuous x continuous inspector.visualize_two_fields("petal_width",