def test_regard_as(self): """ conversion of variable type """ df = pd.read_csv(self.test_data) inspector = Inspector(df, m_cats=20) self.assertEqual(inspector.result.loc["age", "variable"], VariableType.continuous.name) inspector.regard_as_categorical("age") self.assertEqual(inspector.result.loc["age", "variable"], VariableType.categorical.name) ## If we set m_cats, then the inspection logic will be executed. ## As a result the manual setting will be lost. inspector.m_cats = 21 self.assertEqual(inspector.result.loc["age", "variable"], VariableType.continuous.name)
# # 1. `constant`: `n_unique==1`. # 2. `binary`: it can take only two values. `n_unique==2` # 3. `categorical`: it can take finite number of values. `dtype = "object"` or `n_unique <= m_cats` # 4. `continuous`: it can take values in real numbers or timestamps. # # Remarks: # # - We do not care if there is an ordering of values: A nominal variable and an ordinal variable are both just a categorical variable. # - We regard a binary variable as a special case of a categorical variable. # - A constant variable is neither categorical nor continuous. # - The data type `datetime` is always regarded as a continuous variable, even though it can actually be a categorical variable (such as year-month). # # If you want to change the result, you can modify `m_cats` or use `regard_as_categorical()` or `regard_as_continuous()`. inspector.regard_as_categorical("age") inspector.result.loc["age", :] ## The variable type of age is categorical # If we assign a number to `m_cats`, then the inspection is computed again. As a result your manual modification of variable types will be lost. inspector.m_cats = 20 inspector.result.query( "dtype == 'int64'") ## The variable type of age is now continuous. # If you want to calculate the inspection once again because you converted a column, then `make_an_inspection()` does the job. inspector.make_an_inspection() # We can get easily the list of categorical/continuous variables. (Note that a constant variable is neither categorical nor continuous.) print(inspector.get_cats()