示例#1
0
 def test_classify(self):
     """ada_boost - classify build classifications from stumps"""
     classifiers, estimates =\
         ada_boost.train_dataset(self.larger_matrix,
                                 self.larger_class_labels,
                                 9)
     data_to_classify = [1, 0.5]
     classifications = ada_boost.classify(data_to_classify, classifiers)
     expected = np.mat([-1.])
     self.assertEqual(classifications, expected)
示例#2
0
 def test_classify(self):
     """ada_boost - classify build classifications from stumps"""
     classifiers, estimates =\
         ada_boost.train_dataset(self.larger_matrix,
                                 self.larger_class_labels,
                                 9)
     data_to_classify = [1, 0.5]
     classifications = ada_boost.classify(data_to_classify, classifiers)
     expected = np.mat([-1.])
     self.assertEqual(classifications, expected)
示例#3
0
def test_classification():
    D = np.mat(np.ones((5, 1)) / 5)
    data_matrix, class_labels = load_simple_data()
    stump, min_error, best_estimate = ada_boost.build_stump(data_matrix,
                                                            class_labels,
                                                            D)
    logging.info('stump: {}'.format(stump))
    logging.info('min_error: {}'.format(min_error))
    logging.info('best_estimate: {}'.format(best_estimate))
    classifier_array, aggregated_class_estimates =\
        ada_boost.train_dataset(data_matrix, class_labels, 9)
    data_to_classify = [0, 0]
    classifications = ada_boost.classify(data_to_classify, classifier_array)
    logging.info("classifications: {c}".format(c=classifications))
示例#4
0
def test_horse_colic():
    data, labels = utils.load_tsv_datafile('data/horseColicTraining.txt')
    classifier_array, aggregated_class_estimates =\
        ada_boost.train_dataset(data, labels, 10)

    test_data, test_labels = utils.load_tsv_datafile('data/horseColicTest.txt')
    prediction10 = ada_boost.classify(test_data, classifier_array)

    # calculate the error
    elements = 67
    err_array = np.mat(np.ones((elements, 1)))
    error_count = err_array[prediction10 != np.mat(test_labels).T].sum()
    message = "total_errors = {total_errors}; error rate= {rate}".\
        format(total_errors=error_count, rate=(error_count / elements))
    logging.info(message)
示例#5
0
 def test_train_dataset(self):
     """ada_boost - train_dataset trains the dataset
     and returns estimates"""
     classifiers, estimates =\
         ada_boost.train_dataset(self.larger_matrix,
                                 self.larger_class_labels,
                                 9)
     expected = [
         {'alpha': 0.6931471805599453,
          'dim': 0,
          'inequal': 'lt',
          'threshold': 1.3},
         {'alpha': 0.9729550745276565,
          'dim': 1,
          'inequal': 'lt',
          'threshold': 1.0},
         {'alpha': 0.8958797346140273,
          'dim': 0,
          'inequal': 'lt',
          'threshold': 0.90000000000000002}
     ]
     self.assertEqual(classifiers, expected)
示例#6
0
 def test_train_dataset(self):
     """ada_boost - train_dataset trains the dataset
     and returns estimates"""
     classifiers, estimates =\
         ada_boost.train_dataset(self.larger_matrix,
                                 self.larger_class_labels,
                                 9)
     expected = [{
         'alpha': 0.6931471805599453,
         'dim': 0,
         'inequal': 'lt',
         'threshold': 1.3
     }, {
         'alpha': 0.9729550745276565,
         'dim': 1,
         'inequal': 'lt',
         'threshold': 1.0
     }, {
         'alpha': 0.8958797346140273,
         'dim': 0,
         'inequal': 'lt',
         'threshold': 0.90000000000000002
     }]
     self.assertEqual(classifiers, expected)
示例#7
0
def test_compute_horse_auc():
    """plot the AUC for the horse colic data"""
    data, labels = utils.load_tsv_datafile('data/horseColicTraining.txt')
    classifier_array, aggregated_class_estimates =\
        ada_boost.train_dataset(data, labels, 40)
    ada_boost.plot_roc(aggregated_class_estimates.T, labels)