def test_default_rule_correct(self): cba = CBA(support=0.9) cba_m2 = CBA(support=0.9) header1 = ["A", "B", "Y"] rows1 = [ [1, 1, 0], [0, 0, 1], ] transactions = TransactionDB(rows1, header1) cba.fit(transactions) cba_m2.fit(transactions) default_class = cba.clf.default_class default_class_m2 = cba_m2.clf.default_class self.assertTrue(default_class in ["0", "1"]) self.assertTrue(default_class_m2 in ["0", "1"]) default_class_support = cba.clf.default_class_support default_class_confidence = cba.clf.default_class_confidence default_class_support_m2 = cba_m2.clf.default_class_support default_class_confidence_m2 = cba_m2.clf.default_class_confidence self.assertTrue(0 <= default_class_support <= 1) self.assertTrue(0 <= default_class_support_m2 <= 1) self.assertTrue(0 <= default_class_confidence <= 1) self.assertTrue(0 <= default_class_confidence_m2 <= 1)
def fmax(param_dict): print(param_dict) support, confidence = param_dict["support"] / 1000, param_dict[ "confidence"] / 1000 print(dict(support=support, confidence=confidence)) cba = CBA(support=support, confidence=confidence) cba.fit(txns) cba_clf = cba.clf ids = IDS() ids_clf = IDSClassifier( IDSRuleSet.from_cba_rules(cba_clf.rules).ruleset) ids_clf.quant_dataframe_train = quant_df ids_clf.calculate_default_class() ids.clf = ids_clf metrics = ids.score_interpretability_metrics(quant_df_test) if not is_solution_interpretable(metrics): distance = solution_interpretability_distance(metrics) print(-distance) return -distance auc = ids.score_auc(quant_df_test) print(auc) return auc
def run_cba(Xtr, Ytr, Xt, Yt, lb, support=0.20, confidence=0.5, k=None, log=None): txns_train = TransactionDB.from_DataFrame(pd.concat([Xtr, Ytr], axis=1)) txns_test = TransactionDB.from_DataFrame(pd.concat([Xt, Yt], axis=1)) cba = CBA(support=support, confidence=confidence, algorithm="m1") cba.fit(txns_train) if k is not None: cba.clf.rules = cba.clf.rules[:k] Y_pred = [int(i) for i in cba.predict(txns_test)] for r in cba.clf.rules: r.covered = set( [i for i, rd in enumerate(txns_train) if r.antecedent <= rd]) if log is None: from logger import log log('cba-k', len(cba.clf.rules)) log('cba-rules', str(cba.clf.rules)) [log('cba-nconds', len(r), i) for i, r in enumerate(cba.clf.rules)] log('cba-auc', roc_auc_score(lb.transform(Yt.values), lb.transform(Y_pred))) log('cba-bacc', balanced_accuracy_score(Yt, Y_pred)) log('cba-disp', dispersion_(cba.clf.rules, average=True)) log('cba-overlap', overlap(cba.clf.rules)) print(confusion_matrix(Yt, Y_pred))
def test_fitting(self): cba = CBA() test_dataframe = pd.read_csv(dataset_file, sep=",") transactions = TransactionDB.from_DataFrame(test_dataframe) cba.fit(transactions)
def test_predict_probability(self): cba = CBA(algorithm="m2") test_dataframe = pd.read_csv(dataset_file, sep=",") transactions = TransactionDB.from_DataFrame(test_dataframe) transactions_test = TransactionDB.from_DataFrame(test_dataframe[:2]) cba.fit(transactions) cba.predict_probability(transactions_test)
def test_predict_probablity(self): header1 = ["A", "B", "Y"] rows1 = [[1, 1, 0], [1, 1, 0], [1, 1, 1], [0, 0, 0], [0, 0, 1], [0, 0, 1]] transactions = TransactionDB(rows1, header1) cba = CBA() cba.fit(transactions) probs = cba.clf.predict_probability_all(transactions)
def test_accuracy(self): expected_accuracy = 0.5 cba = CBA(algorithm="m2") test_dataframe = pd.read_csv(dataset_file, sep=",") transactions = TransactionDB.from_DataFrame(test_dataframe) transactions_test = TransactionDB.from_DataFrame(test_dataframe[:2]) cba.fit(transactions) accuracy = cba.rule_model_accuracy(transactions_test) self.assertAlmostEqual(accuracy, expected_accuracy, places=3)
def test_predict_probability_works(self): cba = CBA(algorithm="m1") test_dataframe = pd.read_csv(dataset_file, sep=",") transactions = TransactionDB.from_DataFrame(test_dataframe) transactions_test = TransactionDB.from_DataFrame(test_dataframe[:2]) cba.fit(transactions) probabilities = cba.predict_probability(transactions_test) matched_rules = cba.predict_matched_rules(transactions_test) for idx in range(len(probabilities)): self.assertEqual(probabilities[idx], matched_rules[idx].confidence)
def test_target_class_works(self): cba = CBA(algorithm="m2") test_dataframe = pd.read_csv(dataset_file, sep=",") transactions = TransactionDB.from_DataFrame(test_dataframe, target="Gender") cba.fit(transactions) rules = cba.clf.rules rule0 = rules[0] self.assertEqual(rule0.consequent[0], "Gender")
def test_rule_class_label_works(self): cba = CBA(algorithm="m2") test_dataframe = pd.read_csv(dataset_file, sep=",") transactions = TransactionDB.from_DataFrame(test_dataframe) cba.fit(transactions) rules = cba.clf.rules rule0 = rules[0] self.assertEqual(rule0.consequent[0], test_dataframe.columns.values[-1])
def test_inspect(self): cba = CBA() test_dataframe = pd.read_csv(dataset_file, sep=";") transactions = TransactionDB.from_DataFrame(test_dataframe) cba.fit(transactions) clf = cba.clf inspect_df = clf.inspect() self.assertEqual(type(inspect_df), pd.DataFrame) self.assertEqual(len(inspect_df), len(clf.rules) + 1) self.assertEqual(inspect_df["lhs"].iloc[-1], "{}")
def misuse(train_df, test_df): print("\nMisuse Data") train = train_df.copy() test = test_df.copy() train = train[(train['Label'] != 'Benign') == True] txns_train = TransactionDB.from_DataFrame(train, target="Label") txns_test = TransactionDB.from_DataFrame(test) print("Association Rule Generation") cba = CBA(support=0.01, confidence=0.01) cba.fit(txns_train) predict = cba.predict(txns_test) test['predict'] = predict return test
def fmax(param_dict): print(param_dict) support, confidence = param_dict["support"] / 1000, param_dict["confidence"] / 1000 print(dict(support=support, confidence=confidence)) cba = CBA(support=support, confidence=confidence) cba.fit(txns) cba_clf = cba.clf ids = IDS() ids_clf = IDSClassifier(IDSRuleSet.from_cba_rules(cba_clf.rules).ruleset) ids_clf.quant_dataframe_train = quant_df ids_clf.calculate_default_class() ids.clf = ids_clf auc = ids.score_auc(quant_df_test) print(auc) return auc
def train_model(data, support, confidence, rule_length): print('Mining rules...') headers = data.columns data = TransactionDB.from_DataFrame(data) cba = CBA(support=support, confidence=confidence, maxlen=rule_length, algorithm="m1") cba.fit(data) accuracy = cba.rule_model_accuracy(data) print('Determining semantic coherence...') semantics.precompute_attributes(headers) for rule in cba.clf.rules: antecedent = list(rule.antecedent.itemset.keys()) coherence = semantics.get_rule_coherence(antecedent) mined_rules.append((f'{rule_to_em_rule(rule)}', rule.confidence, coherence, rule.consequent.value)) print_model() print(f'Model accuracy: {round(accuracy * 100, 2)} %') print(f'Model support: {cba.support}')
support=(1, 999), confidence=(1, 999), ), ternary_search_precision=5, max_iterations=1, extension_precision=-1, func_args_extension=dict(support=0, confidence=0)) best_pars = coord_asc.fit() print("best_pars:", best_pars) support, confidence = best_pars[0] / 1000, best_pars[1] / 1000 cba = CBA(support=support, confidence=confidence) cba.fit(txns) cba_clf = cba.clf ids = IDS() ids_clf = IDSClassifier( IDSRuleSet.from_cba_rules(cba_clf.rules).ruleset) ids_clf.quant_dataframe_train = quant_df ids_clf.calculate_default_class() ids.clf = ids_clf data = dict(dataset_name=dataset_name, algorithm="pyARC", auc=ids.score_auc(quant_df_test, order_type="cba"), rule_cutoff=rule_cutoff)
import pandas as pd from pyarc import CBA, TransactionDB from sklearn.model_selection import train_test_split print("") print("Rule Generation") data = pd.read_csv('total_data.csv') #disc_data/Known_attack_data print("Data Size : %s" % str(data.shape)) # 데이터 분할 train, test = train_test_split(data, test_size=0.2, random_state=123) txns_train = TransactionDB.from_DataFrame(train, target="class") txns_test = TransactionDB.from_DataFrame(test) print("Association Rule Generation") cba = CBA(support=0.1) cba.fit(txns_train) print(cba.fit(txns_train)) print("\nRULES : ({})".format(len(cba.clf.rules))) for i in cba.clf.rules: print(i) accuracy = cba.rule_model_accuracy(txns_test) print("") print(accuracy)
benchmark_list = [] for dataset_filename in dataset_files: print(dataset_filename) df_train = pd.read_csv(os.path.join(dataset_path_train, dataset_filename)) df_test = pd.read_csv(os.path.join(dataset_path_test, dataset_filename)) txns_train = TransactionDB.from_DataFrame(df_train) txns_test = TransactionDB.from_DataFrame(df_test) quant_df_train = QuantitativeDataFrame(df_train) quant_df_test = QuantitativeDataFrame(df_test) cba = CBA(support=0.1, confidence=0.1) cba.fit(txns_train) rules = cba.clf.rules ids_ruleset = IDSRuleSet.from_cba_rules(rules) ids = IDS() ids.clf = IDSClassifier(ids_ruleset.ruleset) ids.clf.default_class = cba.clf.default_class metrics_dict = ids.score_interpretability_metrics(quant_df_test) benchmark_dict = dict(dataset_filename=dataset_filename, algorithm="cba") benchmark_dict.update(metrics_dict) print(benchmark_dict)