def analyze_features(best_features='best_weak_learners_200.pkl', features_file='all_predict_features.pkl', num_pos=4916, num_neg=7960, top=None): """ :param best_features: file containing a list of the best classifiers selected by adaboost :param features_file: file containing a matrix of predicted features :param num_pos: number of positive images in the training set :param num_neg: number of negative images in the training set :return: """ best_features = pickle_load(best_features) predict_features = pickle_load(features_file) sum_of_alphas = 0 n_estimators = len(best_features) labels = np.zeros(num_pos+num_neg, dtype=np.int8) labels[:num_pos] = 1 if top is None: top = n_estimators accuracy_rates = np.empty(top, dtype=np.float32) false_positive_rates = np.empty(top, dtype=np.float32) for i, estimator in enumerate(best_features[:top]): min_feature_num, min_error, error_rate, threshold, parity , alpha = estimator sum_of_alphas += alpha pred_features = np.ravel(predict_features[min_feature_num].todense()) if i == 0: combined_threshold = pred_features * alpha else: combined_threshold += pred_features * alpha pred = combined_threshold > sum_of_alphas / 2 cmat = confusion_matrix(labels, pred) # compute the false positive of the current final classifier # (using all features selected by Adaboost up to i+1) false_positive_rates[i] = cmat[1, 0] / (cmat[0, 0] + cmat[1, 0]) # compute the accuracy of the final classifier # (using all features selected by Adaboost up to i+1) accuracy_rates[i] = (cmat[0, 0] + cmat[1, 1]) / len(pred) return accuracy_rates, false_positive_rates
def analyze_features(best_features='adaboost300.pkl', features_file='pred_features.pkl', num_pos=4916, num_neg=7960): from sklearn.metrics import confusion_matrix best_features = pickle_load(best_features) features = pickle_load(features_file) sum_of_alphas = 0 n_estimators = len(best_features) labels = np.zeros(num_pos+num_neg, dtype=np.int8) labels[:num_pos] = 1 accuracy_rates = np.empty(n_estimators, dtype=np.float32) false_positive_rates = np.empty(n_estimators, dtype=np.float32) for i, estimator in enumerate(best_features): min_feature_num, min_error, error_rate, threshold, parity, alpha = estimator sum_of_alphas += alpha if i == 0: combined_threshold = features[min_feature_num] * alpha else: combined_threshold += features[min_feature_num] * alpha pred = combined_threshold > sum_of_alphas / 2 cmat = confusion_matrix(labels, pred) # compute the false positive of the current final classifier # (using all features selected by Adaboost up to i+1) false_positive_rates[i] = cmat[1, 0] / (cmat[0, 0] + cmat[1, 0]) # compute the accuracy of the final classifier # (using all features selected by Adaboost up to i+1) accuracy_rates[i] = (cmat[0, 0] + cmat[1, 1]) / len(pred) return accuracy_rates, false_positive_rates
def feature_train(data_path, weak_learner_file='all_weak_learners.pkl', features_file='predict_error.pkl', num_pos=4916, num_neg=7960, n_estimators=200): start = time.time() print 'Starting Adaboost training ...' parent_dir = os.getcwd() os.chdir(data_path) weak_learners = pickle_load(weak_learner_file) features = pickle_load(features_file) os.chdir(parent_dir) # setup the initial weights data_weights = np.empty(num_pos+num_neg, dtype=np.float32) data_weights[:num_pos] = 1 / 2 / num_pos data_weights[num_pos:] = 1 / 2 / num_neg labels = np.zeros(num_pos+num_neg, dtype=np.int8) labels[:num_pos] = 1 best_weak_learners = [] for i in xrange(n_estimators): # find the best weak learner by computing the minimum error round_start = time.time() error = np.dot(features, data_weights) # select the best weak learner min_feature_num = np.argmin(error) min_error = error[min_feature_num] pred_errs = features[min_feature_num] error_rate, threshold, parity = weak_learners[min_feature_num] # update the data weights beta = min_error / (1 - min_error) alpha = -np.log(beta) Beta = np.copy(data_weights) Beta.fill(beta) data_weights = data_weights * np.power(Beta, np.logical_not(pred_errs)) data_weights = data_weights / np.sum(data_weights) # normalize the weights hist, bin_edges = np.histogram(data_weights, bins=10, range=(0, 1), density=True) print hist # ensure the feature selected will NOT be selected again # this ensures that the previously selected feature have the worst error rates features[min_feature_num] = 2 print 'Iter #{:3d}'.format(i+1) print '-' * 30 print 'Feature = {: 06d}'.format(min_feature_num) print 'W. Error = {:0.6f}'.format(min_error) print 'Error = {:0.6f}'.format(error_rate) print 'Threshold = {:0.6f}'.format(threshold) print 'Beta = {:0.6f}'.format(beta) print 'Alpha = {:0.6f}'.format(alpha) print 'This round took %5.2f secs.' % (time.time() - round_start) print '-' * 30 print # save this weak classifier best_weak_learners.append(TrainedFeatures(min_feature_num, min_error, error_rate, threshold, parity, alpha)) print 'Finished Adaboost training in %5.2f secs.' % (time.time() - start) return best_weak_learners
if key == 27: stop_inspection = True break if key == ord('n'): break cv2.imshow(win_name, img) if stop_inspection: break cv2.destroyAllWindows() if __name__ == "__main__": parser = argparse.ArgumentParser(description='Feature Inspection') parser.add_argument('-features', nargs='+', type=int, default=[66214], help='inspect features by their id number') parser.add_argument('-all', action='store_true', help='inspect all features chosen by Adaboost') args = parser.parse_args() if not args.all and len(args.features): inspect_features(args.features) # inspect individual or a set of features given the feature id if args.all: best_features = pickle_load('adaboost10.pkl') # best_features = pickle_load('best_weak_learners_10.pkl') inspect_best_features(best_features)