def get_train_data(perc, freq): print("Feature selection percentile: %d" % perc) train_data = load_train_data("./data/train.txt", word_count_threshold=5, freq=freq, feature_selection_flag=True, percentile=perc) return train_data
def test_bnbc(): print_title("Binomial NBC") #percentile_list = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100] percentile_list = [40] for perc in percentile_list: print("=" * 20) print("Feature selection percentile: %d" % perc) train_data = load_train_data( "./data/train.txt", word_count_threshold=2, freq=False, feature_selection_flag=True, percentile=perc) bnbc_func(train_data["x"], train_data["y"])
def test_entropy(): print_title("Max-Entropy Classifier") percentile_list = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100] #percentile_list = [40] for perc in percentile_list: print("=" * 20) print("Feature selection percentile: %d" % perc) train_data = load_train_data( "./data/train.txt", word_count_threshold=1, freq=False, feature_selection_flag=True, percentile=perc) entropy_func(train_data['x'], train_data['y'])
def test_svm(kernel): freq = False print_title("SVM Classifier") percentile_list = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100] #percentile_list = [40] for perc in percentile_list: print("=" * 20 + "\nkernel: %s || freq: %s" % (kernel, freq)) print("Feature selection percentile: %d" % perc) train_data = load_train_data( "./data/train.txt", word_count_threshold=1, freq=freq, feature_selection_flag=True, percentile=perc) svm_func(train_data['x'], train_data['y'], kernel=kernel)