def run_trepan(X_train, X_test, y_train, y_test, discrete_list, dataset_par, model): X_train, X_test = X_train.to_numpy(), X_test.to_numpy() n_class = dataset_par['classes'] oracle = Oracle(model, n_class, X_train, discrete_list) tree_obj = Tree(oracle) # build tree with TREPAN root = tree_obj.build_tree() tree_obj.assign_levels(root, 0) # tree_obj.print_tree_levels(root) final_labels = tree_obj.leaf_values(root) print(final_labels) tree_obj.print_tree_rule(root) final_rules = tree_obj.rule_list(root) print(final_rules) # calculate metrics num_test_examples = X_test.shape[0] predi_torch = np.argmax(model.predict(X_test), axis=1) perturbed_data = perturbator(X_test) rule_labels = [] perturbed_labels = [] for i in range(0, num_test_examples): instance = X_test[i, :] instance_label = tree_obj.predict(instance, root) rule_labels.append(instance_label) perturbed_instance = perturbed_data[i, :] perturbed_labels.append(tree_obj.predict(perturbed_instance, root)) rule_write('TREPAN_', final_rules, dataset_par) correctness = accuracy_score(y_test, rule_labels) fidelity = accuracy_score(predi_torch, rule_labels) robustness = accuracy_score(rule_labels, perturbed_labels) rule_n = len(final_rules) avg_length = 0 for item in final_rules: avg_length += sum([len(d['n']) for d in item]) avg_length = avg_length / rule_n class_fraction = len(set(final_labels)) / n_class print("Completeness of the ruleset is: " + str(1)) print("Correctness of the ruleset is: " + str(correctness)) print("Fidelity of the ruleset is: " + str(fidelity)) print("Robustness of the ruleset is: " + str(robustness)) print("Number of rules : " + str(rule_n)) print("Average rule length: " + str(avg_length)) print("Fraction overlap: " + str(0)) print("Fraction of classes: " + str(class_fraction)) return [ 1, correctness, fidelity, robustness, rule_n, avg_length, 0, class_fraction ]
def refne_run(X_train, X_test, y_test, discrete_attributes, continuous_attributes, dataset_par, model, save_graph): label_col = dataset_par['output_name'] discrete_attributes = column_translator(X_train, label_col, discrete_attributes) continuous_attributes = column_translator(X_train, label_col, continuous_attributes) all_column_combos = column_combos(categorical_var=discrete_attributes, continuous_var=continuous_attributes) synth_samples = X_train.shape[0] xSynth = synthetic_data_generator(X_train, synth_samples) xSynth = xSynth.append(X_train, ignore_index=True) ySynth = np.argmax(model.predict(xSynth), axis=1) n_class = dataset_par['classes'] # Discretizing the continuous attributes attr_list = xSynth.columns.tolist() xSynth[label_col] = ySynth interv_dict = {} for attr in attr_list: if attr in continuous_attributes: interv = interval_definer(data=xSynth, attr=attr, label=label_col) xSynth[attr] = discretizer(xSynth[attr], interv) X_train[attr] = discretizer(X_train[attr], interv) interv_dict[attr] = interv else: unique_values = np.unique(xSynth[attr]).tolist() interv_dict[attr] = [ list(a) for a in zip(unique_values, unique_values) ] final_rules = rule_maker(xSynth, interv_dict, all_column_combos, label_col, model) # Calculation of metrics predicted_labels = np.argmax(model.predict(X_test), axis=1) metrics = rule_metrics_calculator(X_test, y_test, predicted_labels, final_rules, n_class) rule_write('REFNE_', final_rules, dataset_par) if save_graph: attack_list, final_rules = attack_definer(X_test, final_rules, merge_rules=True) create_empty_file('REFNE_' + dataset_par['dataset'] + "_attack_list") save_list(attack_list, 'REFNE_' + dataset_par['dataset'] + "_attack_list") create_empty_file('REFNE_' + dataset_par['dataset'] + "_final_rules") save_list(final_rules, 'REFNE_' + dataset_par['dataset'] + "_final_rules") return metrics
def run_c45_pane(X_train, X_test, y_test, dataset_par, model, labels): print(labels) x_tot, y_tot, clf = create_tree(X_train, model) # rules = export_text(clf) # Showing the rules # print_decision_tree(clf) predicted_labels = clf.predict(X_test) model_test_labels = np.argmax(model.predict(X_test), axis=1) perturbed_data = perturbator(X_test) perturbed_labels = clf.predict(perturbed_data) num_test_examples = X_test.shape[0] depths = get_node_depths(clf.tree_) rule_write('C45_', clf, dataset_par) return metric_calculator(predicted_labels, y_test, model_test_labels, perturbed_labels, depths, len(labels), num_test_examples)
def rxren_run(X_train, X_test, y_train, y_test, dataset_par, model, save_graph): y = np.concatenate((y_train, y_test), axis=0) column_lst = X_train.columns.tolist() column_dict = {i: column_lst[i] for i in range(len(column_lst))} X_train, X_test = X_train.to_numpy(), X_test.to_numpy() n_class = dataset_par['classes'] # This will be used for calculating the final metrics predicted_labels = np.argmax(model.predict(X_test), axis=1) # model = load_model(MODEL_NAME) weights = np.array(model.get_weights()) results = np.argmax(model.predict(X_train), axis=1) correctX = X_train[[results[i] == y_train[i] for i in range(len(y_train))]] print('Number of correctly classified examples', correctX.shape) correcty = y_train[[results[i] == y_train[i] for i in range(len(y_train))]] acc = accuracy_score(results, y_train) print("Accuracy of original model on the train dataset: ", acc) test_pred = np.argmax(model.predict(X_test), axis=1) test_acc = accuracy_score(test_pred, y_test) print("Accuracy of original model on the test dataset: ", test_acc) miss_list, ins_index, new_accuracy, err = network_pruning( weights, correctX, correcty, X_test, y_test, test_acc, in_item=dataset_par) significant_index = [ i for i in range(weights[0].shape[0]) if i not in ins_index ] significant_columns = { i: v for i, v in column_dict.items() if i in significant_index } print("Accuracy of pruned network", new_accuracy) rule_limits = rule_limits_calculator(correctX, correcty, miss_list, significant_index, err, alpha=0.5) rule_limits = rule_formatter(rule_combiner(rule_limits)) rule_limits, rule_acc = rule_pruning(X_test, y_test, rule_limits, n_class) y_test_predicted = np.argmax(model.predict(X_test), axis=1) rule_simplifier = True while rule_simplifier: new_rule_acc, rule_limits = rule_evaluator(X_test, y_test_predicted, rule_limits, rule_acc, np.unique(y)) if sum(new_rule_acc.values()) > sum(rule_acc.values()): rule_acc = new_rule_acc else: rule_simplifier = False final_rules = rule_sorter(rule_limits, X_test, significant_columns) X_test, _ = input_delete(ins_index, X_test) X_test = pd.DataFrame(X_test, columns=significant_columns.values()) metrics = rule_metrics_calculator(X_test, y_test, predicted_labels, final_rules, n_class) rule_write('RxREN_', final_rules, dataset_par) if save_graph: attack_list, final_rules = attack_definer(X_test, final_rules) create_empty_file('RxREN_' + dataset_par['dataset'] + "_attack_list") save_list(attack_list, 'RxREN_' + dataset_par['dataset'] + "_attack_list") create_empty_file('RxREN_' + dataset_par['dataset'] + "_final_rules") save_list(final_rules, 'RxREN_' + dataset_par['dataset'] + "_final_rules") return metrics
def rxncn_run(X_train, X_test, y_train, y_test, dataset_par, model, save_graph): # Alpha is set equal to the percentage of input instances belonging to the least-represented class in the dataset alpha = 0.1 n_class = dataset_par['classes'] X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.33) print(X_train.columns) column_lst = X_train.columns.tolist() column_dict = {i: column_lst[i] for i in range(len(column_lst))} y = np.concatenate((y_train, y_test), axis=0) X_train, X_test, X_val = X_train.to_numpy(), X_test.to_numpy(), X_val.to_numpy() weights = np.array(model.get_weights()) results = model.predict_classes(X_train) # This will be used for calculating the final metrics predicted_labels = prediction_reshape(model.predict(np.concatenate([X_train, X_test, X_val], axis=0))) correctX = X_train[[results[i] == y_train[i] for i in range(len(y_train))]] print('Number of correctly classified examples', correctX.shape) correcty = y_train[[results[i] == y_train[i] for i in range(len(y_train))]] acc = accuracy_score(results, y_train) print("Accuracy of original model on the train dataset: ", acc) test_pred = prediction_reshape(model.predict(X_val)) test_acc = accuracy_score(test_pred, y_val) print("Accuracy of original model on the validation dataset: ", test_acc) miss_dict, pruned_x, pruned_w, err, sig_cols = network_pruning(weights, correctX, correcty, X_val, y_val, test_acc, column_dict, in_item=dataset_par) correct_dict = correct_examples_finder(pruned_x, correcty, dataset_par, sig_cols, in_weight=pruned_w) final_dict = combine_dict_list(miss_dict, correct_dict) rule_limits = rule_limits_calculator(pruned_x, correcty, final_dict, sig_cols, alpha=alpha) rule_limits = rule_formatter(rule_limits) if len(rule_limits) > 0: insignificant_neurons = [key for key, value in column_dict.items() if value not in list(sig_cols.values())] X_test, _ = input_delete(insignificant_neurons, X_test) X_train, _ = input_delete(insignificant_neurons, X_train) X_val, _ = input_delete(insignificant_neurons, X_val) X_tot = np.concatenate([X_train, X_test, X_val], axis=0) y_tot = np.concatenate([y_train, y_test, y_val], axis=0) rule_limits, rule_accuracy = rule_pruning(X_val, y_val, rule_limits, n_class) final_rules = rule_sorter(rule_limits, X_test, sig_cols) y_val_predicted = model_pruned_prediction([], X_val, dataset_par, in_weight=pruned_w) X_val = pd.DataFrame(X_val, columns=sig_cols.values()) rule_simplifier = True while rule_simplifier: new_rule_acc, final_rules = rule_evaluator(X_val, y_val_predicted, final_rules, rule_accuracy, np.unique(y)) if sum(new_rule_acc.values()) > sum(rule_accuracy.values()): rule_accuracy = new_rule_acc else: rule_simplifier = False X_tot = pd.DataFrame(X_tot, columns=sig_cols.values()) # print(final_rules) metrics = rule_metrics_calculator(X_tot, y_tot, predicted_labels, final_rules, n_class) rule_write('RxNCM_', final_rules, dataset_par) if save_graph: attack_list, final_rules = attack_definer(X_tot, final_rules) create_empty_file('RxNCM_' + dataset_par['dataset'] + "_attack_list") save_list(attack_list, 'RxNCM_' + dataset_par['dataset'] + "_attack_list") create_empty_file('RxNCM_' + dataset_par['dataset'] + "_final_rules") save_list(final_rules, 'RxNCM_' + dataset_par['dataset'] + "_final_rules") return metrics else: return np.zeros(8).tolist()