def setUpClass(cls): cls.sbrl_inst = BRLC(min_rule_len=1, max_rule_len=2, iterations=10000, n_chains=3) cls.input_data = pd.read_csv('skater/tests/data/sample_data.csv') # data transformation and cleaning ... cls.input_data["Sex"] = cls.input_data["Sex"].astype('category') cls.input_data["Sex_Encoded"] = cls.input_data["Sex"].cat.codes cls.input_data["Embarked"] = cls.input_data["Embarked"].astype( 'category') cls.input_data["Embarked_Encoded"] = cls.input_data[ "Embarked"].cat.codes cls.input_data = cls.input_data.drop( ['Ticket', 'Cabin', 'Name', 'Sex', 'Embarked'], axis=1) # Remove NaN values cls.input_data = cls.input_data.dropna() cls.y = cls.input_data['Survived'] cls.input_data = cls.input_data.drop(['Survived'], axis=1) # Train a model cls.sbrl_inst.fit(cls.input_data[1:50], cls.y[1:50], undiscretize_feature_list=[ "PassengerId", "Pclass", "SibSp", "Parch", "Sex_Encoded", "Embarked_Encoded" ])
def setUp(self): self.sbrl_inst = BRLC(min_rule_len=1, max_rule_len=2, iterations=10000, n_chains=3) self.input_data = pd.read_csv('skater/tests/data/sample_data.csv') # data transformation and cleaning ... self.input_data["Sex"] = self.input_data["Sex"].astype('category') self.input_data["Sex_Encoded"] = self.input_data["Sex"].cat.codes self.input_data["Embarked"] = self.input_data["Embarked"].astype( 'category') self.input_data["Embarked_Encoded"] = self.input_data[ "Embarked"].cat.codes self.input_data = self.input_data.drop( ['Ticket', 'Cabin', 'Name', 'Sex', 'Embarked'], axis=1) # Remove NaN values self.input_data = self.input_data.dropna() self.y = self.input_data['Survived'] self.input_data = self.input_data.drop(['Survived'], axis=1)
class TestRuleList(unittest.TestCase): def setUp(self): self.sbrl_inst = BRLC(min_rule_len=1, max_rule_len=2, iterations=10000, n_chains=3) self.input_data = pd.read_csv('skater/tests/data/sample_data.csv') # data transformation and cleaning ... self.input_data["Sex"] = self.input_data["Sex"].astype('category') self.input_data["Sex_Encoded"] = self.input_data["Sex"].cat.codes self.input_data["Embarked"] = self.input_data["Embarked"].astype( 'category') self.input_data["Embarked_Encoded"] = self.input_data[ "Embarked"].cat.codes self.input_data = self.input_data.drop( ['Ticket', 'Cabin', 'Name', 'Sex', 'Embarked'], axis=1) # Remove NaN values self.input_data = self.input_data.dropna() self.y = self.input_data['Survived'] self.input_data = self.input_data.drop(['Survived'], axis=1) def test_discretizer(self): new_df = self.sbrl_inst.discretizer(self.input_data, column_list=["Age"]) self.assertEquals(new_df["Age_q_label"].shape[0] > 0, True) def test_model_build(self): self.sbrl_inst.fit(self.input_data[1:50], self.y[1:50], undiscretize_feature_list=[ "PassengerId", "Pclass", "SibSp", "Parch", "Sex_Encoded", "Embarked_Encoded" ]) new_data = self.sbrl_inst.discretizer(self.input_data, column_list=["Age", "Fare"]) result_score = self.sbrl_inst.predict_proba(new_data) result_labels = self.sbrl_inst.predict(new_data) # make sure shape of the dataframe is as expected self.assertEquals(result_score.shape, (77, 2)) self.assertEquals(result_labels[1].shape, (77, )) generated_labels = np.unique(result_labels[1]) expected_labels = np.array([0, 1]) self.assertEquals(np.array_equal(generated_labels, expected_labels), True) def test_model_save_load(self): self.sbrl_inst.fit(self.input_data[1:50], self.y[1:50], undiscretize_feature_list=[ "PassengerId", "Pclass", "SibSp", "Parch", "Sex_Encoded", "Embarked_Encoded" ]) self.sbrl_inst.save_model("test.pkl", compress=True) # Explicitly assigning the model instance to 'None' to validate loading of persisted model # Care is advised when handing the model instance, it might make the model unstable self.sbrl_inst.model = None self.assertEquals(self.sbrl_inst.model is None, True) self.sbrl_inst.load_model("test.pkl") self.assertEquals(self.sbrl_inst.model is not None, True) def test_model_output(self): self.sbrl_inst.fit(self.input_data[1:50], self.y[1:50], undiscretize_feature_list=[ "PassengerId", "Pclass", "SibSp", "Parch", "Sex_Encoded", "Embarked_Encoded" ]) result = self.sbrl_inst.access_learned_rules('23:25') self.assertEquals(len(result), 2) @unittest.skip( "Support for computing validation curve for SBRL is still under development" ) def test_validation(self): param_range = [3, 4] train_scores, test_scores = compute_validation_curve( self.sbrl_inst, n_folds=2, x=self.input_data, y=self.y, param_name="rule_minlen", param_range=param_range) self.assertEquals(train_scores.shape[0], 2) self.assertEquals(test_scores.shape[0], 2)