def test_featureextractiontext_bow(self): list_of_sentences = ["Hi my name is pyml", "Hi name pyml"] columns = ["text"] data = pd.DataFrame(list_of_sentences, columns=columns) feature = Data(x_train=data, test_split_percentage=0.5, split=False, report_name="test") feature.bag_of_words(keep_col=False) validate = feature.x_train.values.tolist() self.assertListEqual(validate, [[1, 1, 1, 1, 1], [1, 0, 0, 1, 1]])
def test_report_feature_bow(self): list_of_sentences = ["Hi my name is pyml", "Hi name pyml"] columns = ["text"] data = pd.DataFrame(list_of_sentences, columns=columns) feature = Data(x_train=data, test_split_percentage=0.5, split=False, report_name="test") feature.bag_of_words() with open(feature.report.filename) as f: content = f.read() validate = "representation" in content os.remove(feature.report.filename) self.assertTrue(validate)