def test_classify(self): """ada_boost - classify build classifications from stumps""" classifiers, estimates =\ ada_boost.train_dataset(self.larger_matrix, self.larger_class_labels, 9) data_to_classify = [1, 0.5] classifications = ada_boost.classify(data_to_classify, classifiers) expected = np.mat([-1.]) self.assertEqual(classifications, expected)
def test_classification(): D = np.mat(np.ones((5, 1)) / 5) data_matrix, class_labels = load_simple_data() stump, min_error, best_estimate = ada_boost.build_stump(data_matrix, class_labels, D) logging.info('stump: {}'.format(stump)) logging.info('min_error: {}'.format(min_error)) logging.info('best_estimate: {}'.format(best_estimate)) classifier_array, aggregated_class_estimates =\ ada_boost.train_dataset(data_matrix, class_labels, 9) data_to_classify = [0, 0] classifications = ada_boost.classify(data_to_classify, classifier_array) logging.info("classifications: {c}".format(c=classifications))
def test_horse_colic(): data, labels = utils.load_tsv_datafile('data/horseColicTraining.txt') classifier_array, aggregated_class_estimates =\ ada_boost.train_dataset(data, labels, 10) test_data, test_labels = utils.load_tsv_datafile('data/horseColicTest.txt') prediction10 = ada_boost.classify(test_data, classifier_array) # calculate the error elements = 67 err_array = np.mat(np.ones((elements, 1))) error_count = err_array[prediction10 != np.mat(test_labels).T].sum() message = "total_errors = {total_errors}; error rate= {rate}".\ format(total_errors=error_count, rate=(error_count / elements)) logging.info(message)
def test_train_dataset(self): """ada_boost - train_dataset trains the dataset and returns estimates""" classifiers, estimates =\ ada_boost.train_dataset(self.larger_matrix, self.larger_class_labels, 9) expected = [ {'alpha': 0.6931471805599453, 'dim': 0, 'inequal': 'lt', 'threshold': 1.3}, {'alpha': 0.9729550745276565, 'dim': 1, 'inequal': 'lt', 'threshold': 1.0}, {'alpha': 0.8958797346140273, 'dim': 0, 'inequal': 'lt', 'threshold': 0.90000000000000002} ] self.assertEqual(classifiers, expected)
def test_train_dataset(self): """ada_boost - train_dataset trains the dataset and returns estimates""" classifiers, estimates =\ ada_boost.train_dataset(self.larger_matrix, self.larger_class_labels, 9) expected = [{ 'alpha': 0.6931471805599453, 'dim': 0, 'inequal': 'lt', 'threshold': 1.3 }, { 'alpha': 0.9729550745276565, 'dim': 1, 'inequal': 'lt', 'threshold': 1.0 }, { 'alpha': 0.8958797346140273, 'dim': 0, 'inequal': 'lt', 'threshold': 0.90000000000000002 }] self.assertEqual(classifiers, expected)
def test_compute_horse_auc(): """plot the AUC for the horse colic data""" data, labels = utils.load_tsv_datafile('data/horseColicTraining.txt') classifier_array, aggregated_class_estimates =\ ada_boost.train_dataset(data, labels, 40) ada_boost.plot_roc(aggregated_class_estimates.T, labels)