Пример #1
0
def analyze_features(best_features='best_weak_learners_200.pkl', features_file='all_predict_features.pkl',
                     num_pos=4916, num_neg=7960, top=None):
    """
    :param best_features: file containing a list of the best classifiers selected by adaboost
    :param features_file: file containing a matrix of predicted features
    :param num_pos: number of positive images in the training set
    :param num_neg: number of negative images in the training set
    :return:
    """
    best_features = pickle_load(best_features)
    predict_features = pickle_load(features_file)

    sum_of_alphas = 0
    n_estimators = len(best_features)

    labels = np.zeros(num_pos+num_neg, dtype=np.int8)
    labels[:num_pos] = 1

    if top is None:
        top = n_estimators

    accuracy_rates = np.empty(top, dtype=np.float32)
    false_positive_rates = np.empty(top, dtype=np.float32)

    for i, estimator in enumerate(best_features[:top]):
        min_feature_num, min_error, error_rate, threshold, parity , alpha = estimator

        sum_of_alphas += alpha
        pred_features = np.ravel(predict_features[min_feature_num].todense())

        if i == 0:
            combined_threshold = pred_features * alpha
        else:
            combined_threshold += pred_features * alpha

        pred = combined_threshold > sum_of_alphas / 2

        cmat = confusion_matrix(labels, pred)

        # compute the false positive of the current final classifier
        # (using all features selected by Adaboost up to i+1)
        false_positive_rates[i] = cmat[1, 0] / (cmat[0, 0] + cmat[1, 0])

        # compute the accuracy of the final classifier
        # (using all features selected by Adaboost up to i+1)
        accuracy_rates[i] = (cmat[0, 0] + cmat[1, 1]) / len(pred)

    return accuracy_rates, false_positive_rates
Пример #2
0
def analyze_features(best_features='adaboost300.pkl', features_file='pred_features.pkl',
                     num_pos=4916, num_neg=7960):
    from sklearn.metrics import confusion_matrix

    best_features = pickle_load(best_features)
    features = pickle_load(features_file)

    sum_of_alphas = 0
    n_estimators = len(best_features)

    labels = np.zeros(num_pos+num_neg, dtype=np.int8)
    labels[:num_pos] = 1

    accuracy_rates = np.empty(n_estimators, dtype=np.float32)
    false_positive_rates = np.empty(n_estimators, dtype=np.float32)

    for i, estimator in enumerate(best_features):
        min_feature_num, min_error, error_rate, threshold, parity, alpha = estimator

        sum_of_alphas += alpha

        if i == 0:
            combined_threshold = features[min_feature_num] * alpha
        else:
            combined_threshold += features[min_feature_num] * alpha

        pred = combined_threshold > sum_of_alphas / 2

        cmat = confusion_matrix(labels, pred)

        # compute the false positive of the current final classifier
        # (using all features selected by Adaboost up to i+1)
        false_positive_rates[i] = cmat[1, 0] / (cmat[0, 0] + cmat[1, 0])

        # compute the accuracy of the final classifier
        # (using all features selected by Adaboost up to i+1)
        accuracy_rates[i] = (cmat[0, 0] + cmat[1, 1]) / len(pred)

    return accuracy_rates, false_positive_rates
Пример #3
0
def feature_train(data_path, weak_learner_file='all_weak_learners.pkl',
                  features_file='predict_error.pkl', num_pos=4916, num_neg=7960, n_estimators=200):

    start = time.time()

    print 'Starting Adaboost training ...'

    parent_dir = os.getcwd()
    os.chdir(data_path)

    weak_learners = pickle_load(weak_learner_file)
    features = pickle_load(features_file)

    os.chdir(parent_dir)

    # setup the initial weights
    data_weights = np.empty(num_pos+num_neg, dtype=np.float32)
    data_weights[:num_pos] = 1 / 2 / num_pos
    data_weights[num_pos:] = 1 / 2 / num_neg
    labels = np.zeros(num_pos+num_neg, dtype=np.int8)
    labels[:num_pos] = 1

    best_weak_learners = []

    for i in xrange(n_estimators):
        # find the best weak learner by computing the minimum error
        round_start = time.time()

        error = np.dot(features, data_weights)

        # select the best weak learner
        min_feature_num = np.argmin(error)
        min_error = error[min_feature_num]
        pred_errs = features[min_feature_num]
        error_rate, threshold, parity = weak_learners[min_feature_num]

        # update the data weights
        beta = min_error / (1 - min_error)
        alpha = -np.log(beta)
        Beta = np.copy(data_weights)
        Beta.fill(beta)
        data_weights = data_weights * np.power(Beta, np.logical_not(pred_errs))
        data_weights = data_weights / np.sum(data_weights)  # normalize the weights
        hist, bin_edges = np.histogram(data_weights, bins=10, range=(0, 1), density=True)
        print hist

        # ensure the feature selected will NOT be selected again
        # this ensures that the previously selected feature have the worst error rates
        features[min_feature_num] = 2

        print 'Iter #{:3d}'.format(i+1)
        print '-' * 30
        print 'Feature   = {: 06d}'.format(min_feature_num)
        print 'W. Error  = {:0.6f}'.format(min_error)
        print 'Error     = {:0.6f}'.format(error_rate)
        print 'Threshold = {:0.6f}'.format(threshold)
        print 'Beta      = {:0.6f}'.format(beta)
        print 'Alpha     = {:0.6f}'.format(alpha)
        print 'This round took %5.2f secs.' % (time.time() - round_start)
        print '-' * 30
        print

        # save this weak classifier
        best_weak_learners.append(TrainedFeatures(min_feature_num, min_error, error_rate,
                                                  threshold, parity, alpha))

    print 'Finished Adaboost training in %5.2f secs.' % (time.time() - start)

    return best_weak_learners
Пример #4
0
            if key == 27:
                stop_inspection = True
                break
            if key == ord('n'):
                break

            cv2.imshow(win_name, img)

        if stop_inspection:
            break

    cv2.destroyAllWindows()


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Feature Inspection')
    parser.add_argument('-features', nargs='+', type=int, default=[66214],
                        help='inspect features by their id number')
    parser.add_argument('-all', action='store_true',
                        help='inspect all features chosen by Adaboost')

    args = parser.parse_args()

    if not args.all and len(args.features):
        inspect_features(args.features)  # inspect individual or a set of features given the feature id

    if args.all:
        best_features = pickle_load('adaboost10.pkl')
        # best_features = pickle_load('best_weak_learners_10.pkl')
        inspect_best_features(best_features)