def make_summary(X_Fit, X, accuracy, accuracy_no, matrix, matrix_no): print(bcolors.YELLOW + 'NO SELECTION: \n' + bcolors.ENDC) print(bcolors.YELLOW + 'NB_OF_FEATURES: ' + str(X.shape[1]) + bcolors.ENDC) print(bcolors.YELLOW + 'ACCURACY: ' + str(accuracy_no) + bcolors.ENDC) print(bcolors.YELLOW + 'PRECISION: ' + str(calculatePrecision(matrix_no)) + bcolors.ENDC) print(bcolors.YELLOW + 'FPT_TPR: ' + str(calculateFPR_TPR_TNR(matrix_no)) + bcolors.ENDC) print(bcolors.YELLOW + 'F1 SCORE: ' + str(calculateF1(matrix_no)) + bcolors.ENDC) print(bcolors.YELLOW + 'MATRIX: \n' + str(matrix_no) + bcolors.ENDC) print(bcolors.GREEN + '\nWITH SELECTION: \n' + bcolors.ENDC) print(bcolors.GREEN + 'NB_OF_FEATURES: ' + str(X_Fit.shape[1]) + bcolors.ENDC) print(bcolors.GREEN + 'ACCURACY: ' + str(accuracy) + bcolors.ENDC) print(bcolors.GREEN + 'PRECISION: ' + str(calculatePrecision(matrix)) + bcolors.ENDC) print(bcolors.GREEN + 'FPT_TPR: ' + str(calculateFPR_TPR_TNR(matrix)) + bcolors.ENDC) print(bcolors.GREEN + 'F1 SCORE: ' + str(calculateF1(matrix)) + bcolors.ENDC) print(bcolors.GREEN + 'MATRIX: \n' + str(matrix) + bcolors.ENDC)
def make_experiment(file, set, elements): [X, y] = elements table = pd.DataFrame(X) table['y'] = y table.sort_values('y', inplace=True) print(table) X_tab = table.drop('y', axis=1) y_tab = table['y'] X, y = X_tab.values, y_tab.values random_subset0 = [X, y] random_subset1 = [X[1::2], y[1::2]] random_subset2 = [X[0::2], y[0::2]] random_subset3 = [X[0::3], y[0::3]] random_subset4 = [X[1::3], y[1::3]] random_subset5 = [X[2::3], y[2::3]] random_subset6 = [X[get_part_of_set(X, 0.4):len(X)], y[get_part_of_set(y, 0.4):len(X)]] random_subset7 = [X[get_part_of_set(X, 0.2):(len(X) - 2)], y[get_part_of_set(y, 0.2):(len(X) - 2)]] subsets = [random_subset0, random_subset1, random_subset2, random_subset3, random_subset4, random_subset5, random_subset6, random_subset7] results = [] for subset in subsets: accuracy, matrix = get_average_score(subset[0], subset[1]) matrix_rev = reverseMatrix(matrix) results.append(calculateF1(matrix_rev)) file.write(wicloxon_string_summary('NO SELECTION', set, X.shape[1], 'null', results)) ALL_METHODS = ['ANOVA', 'RELIEF', 'INFORATION GAIN', 'CHI SQUARE', 'CORRELATION COEF'] for method in ALL_METHODS: make_closest(file, method, set, subsets, results, X.shape[1])
def make_closest(file, method, set, subsets, results, set_len): found = False for feats in range (1, set_len): custom_res = [] for subset in subsets: X_Fit, scores = get_method(method, subset, feats) accuracy, matrix = get_average_score(X_Fit, subset[1]) matrix_rev = reverseMatrix(matrix) custom_res.append(calculateF1(matrix_rev)) try: _, p = calculateWilcoxon(results, custom_res) except ValueError: p = 1 make_wilcoxon_summary(method, set, X_Fit.shape[1], p, results, custom_res) if (p > ALPHA): print('FOUND - ORIGINAL NB: ' + str(set_len) + ' - NEW NB: ' + str(feats) + '\n') file.write(wicloxon_string_summary(method, set, X_Fit.shape[1], p, custom_res)) found = True break if (not(found)): print('NOT FOUND - ROLLBACK\n') file.write(wicloxon_string_summary(method, set, set_len, 0, results))
def get_string_summary(method, set, X, accuracy, matrix, scores): return (set + '; ' + method + '; ' + str(X.shape[1]) + '; ' + str(X.shape[0]) + '; ' + str(accuracy) + '; ' + str(calculateBalancedAcc(matrix)) + '; ' + str(calculatePrecision(matrix)) + '; ' + str(calculateRecall(matrix)) + '; ' + str(calculateF1(matrix)) + '; ' + str(calculateFPR_TPR_TNR(matrix)).replace(' ', '') + '; ' + str( array2string(matrix, separator=',').replace('\n', '').replace( ' ', '')) + '; ' + str(scores).replace(' ', '') + '\n')
def make_experiment(file, set, elements): [X, y] = elements accuracy, matrix = get_average_score(X, y) matrix_rev = reverseMatrix(matrix) make_simple_summary('NO SELECTION', set, X, accuracy, matrix_rev, ['all']) file.write(get_string_summary('NO SELECTION', set, X, accuracy, matrix_rev, ['all'])) basic_f1 = calculateF1(matrix_rev) ALL_METHODS = ['ANOVA', 'RELIEF', 'INFORATION GAIN', 'CHI SQUARE', 'CORRELATION COEF'] if (MODE == 'closest'): for method in ALL_METHODS: make_closest(file, method, set, elements, basic_f1) elif (MODE == 'best'): for method in ALL_METHODS: make_best(file, method, set, elements)
def make_simple_summary(method, set, X, accuracy, matrix, scores): print(bcolors.YELLOW + 'SET: ' + str(set) + bcolors.ENDC) print(bcolors.YELLOW + 'METHOD: ' + str(method) + bcolors.ENDC) print(bcolors.GREEN + 'NB_OF_FEATURES: ' + str(X.shape[1]) + bcolors.ENDC) print(bcolors.GREEN + 'NB_OF_ELEMENTS: ' + str(X.shape[0]) + bcolors.ENDC) print(bcolors.GREEN + 'ACCURACY: ' + str(accuracy) + bcolors.ENDC) print(bcolors.GREEN + 'BALANCED ACC: ' + str(calculateBalancedAcc(matrix)) + bcolors.ENDC) print(bcolors.GREEN + 'PRECISION: ' + str(calculatePrecision(matrix)) + bcolors.ENDC) print(bcolors.GREEN + 'RECALL: ' + str(calculateRecall(matrix)) + bcolors.ENDC) print(bcolors.BOLD + 'F1 SCORE: ' + str(calculateF1(matrix)) + bcolors.ENDC) print(bcolors.GREEN + 'TPR FPR TNR: ' + str(calculateFPR_TPR_TNR(matrix)) + bcolors.ENDC) print(bcolors.GREEN + 'MATRIX: ' + str(matrix).replace('\n', '') + bcolors.ENDC) print(bcolors.GREEN + 'SCORES: ' + str(scores) + bcolors.ENDC) print('\n')
def make_best(file, method, set, elements): best_num_of_feats = 1 [X, y] = elements best_f1 = 0 for feats in range (1, X.shape[1] + 1): X_Fit, scores = get_method(method, elements, feats) accuracy, matrix = get_average_score(X_Fit, y) matrix_rev = reverseMatrix(matrix) new_f1 = calculateF1(matrix_rev) make_simple_summary(method, set, X_Fit, accuracy, matrix_rev, scores) if (new_f1 > best_f1): best_f1 = new_f1 best_num_of_feats = feats print('\nBEST FOR: ' + str(feats) + '\n') X_Fit, scores = get_method(method, elements, best_num_of_feats) accuracy, matrix = get_average_score(X_Fit, y) matrix_rev = reverseMatrix(matrix) make_simple_summary(method, set, X_Fit, accuracy, matrix_rev, scores) file.write(get_string_summary(method, set, X_Fit, accuracy, matrix_rev, scores))
def make_closest(file, method, set, elements, basic_f1): [X, y] = elements found = False for feats in range (1, X.shape[1]): X_Fit, scores = get_method(method, elements, feats) accuracy, matrix = get_average_score(X_Fit, y) matrix_rev = reverseMatrix(matrix) new_f1 = calculateF1(matrix_rev) p = abs(new_f1 - basic_f1) make_simple_summary(method, set, X_Fit, accuracy, matrix_rev, scores) if (p < ALPHA): print('FOUND - ORIGINAL NB: ' + str(X.shape[1]) + ' - NEW NB: ' + str(X_Fit.shape[1]) + '\n') file.write(get_string_summary(method, set, X_Fit, accuracy, matrix_rev, scores)) found = True break if (not(found)): print('NOT FOUND - ROLLBACK\n') X_Fit, scores = get_method(method, elements, X.shape[1]) accuracy, matrix = get_average_score(X_Fit, y) matrix_rev = reverseMatrix(matrix) file.write(get_string_summary(method, set, X_Fit, accuracy, matrix_rev, scores))