def run_cba(Xtr, Ytr, Xt, Yt, lb, support=0.20, confidence=0.5, k=None, log=None): txns_train = TransactionDB.from_DataFrame(pd.concat([Xtr, Ytr], axis=1)) txns_test = TransactionDB.from_DataFrame(pd.concat([Xt, Yt], axis=1)) cba = CBA(support=support, confidence=confidence, algorithm="m1") cba.fit(txns_train) if k is not None: cba.clf.rules = cba.clf.rules[:k] Y_pred = [int(i) for i in cba.predict(txns_test)] for r in cba.clf.rules: r.covered = set( [i for i, rd in enumerate(txns_train) if r.antecedent <= rd]) if log is None: from logger import log log('cba-k', len(cba.clf.rules)) log('cba-rules', str(cba.clf.rules)) [log('cba-nconds', len(r), i) for i, r in enumerate(cba.clf.rules)] log('cba-auc', roc_auc_score(lb.transform(Yt.values), lb.transform(Y_pred))) log('cba-bacc', balanced_accuracy_score(Yt, Y_pred)) log('cba-disp', dispersion_(cba.clf.rules, average=True)) log('cba-overlap', overlap(cba.clf.rules)) print(confusion_matrix(Yt, Y_pred))
def misuse(train_df, test_df): print("\nMisuse Data") train = train_df.copy() test = test_df.copy() train = train[(train['Label'] != 'Benign') == True] txns_train = TransactionDB.from_DataFrame(train, target="Label") txns_test = TransactionDB.from_DataFrame(test) print("Association Rule Generation") cba = CBA(support=0.01, confidence=0.01) cba.fit(txns_train) predict = cba.predict(txns_test) test['predict'] = predict return test