def train_r_0(self): """ Trains the rule model that outputs rules that predict for class 0. """ # the rules generated predict for label 0. br_0 = BooleanRuleCG(CNF = False) br_0.fit(self.binarized_train_data, self.train_labels) self.r_0 = br_0 return
def train_r_1(self): """ Trains the rule model that outputs rules that predict for class 1. """ # the rules generated predict for label 0, so we hvae to # invert the labels to generate rules that predict # for label 1. br_1 = BooleanRuleCG(CNF = False) inverted_train_labels = [] for label in self.train_labels: if label: inverted_train_labels.append(0) else: inverted_train_labels.append(1) br_1.fit(self.binarized_train_data, np.array(inverted_train_labels)) self.r_1 = br_1 return
def aix360_rules_wrapper( df_anomalies, numerical_cols, categorical_cols, rule_algorithm="", simplify_rules=False, model_params={}, ): """ Rules obtained using brlg or logrr. Parameters ---------- df_anomalies : TYPE DESCRIPTION. numerical_cols : TYPE DESCRIPTION. categorical_cols : TYPE DESCRIPTION. rule_algorithm : TYPE, optional DESCRIPTION. The default is "". simplify_rules : TYPE, optional DESCRIPTION. The default is False. model_params : TYPE, optional DESCRIPTION. The default is {}. Raises ------ ValueError DESCRIPTION. Returns ------- df_rules_inliers : TYPE DESCRIPTION. df_rules_outliers : TYPE DESCRIPTION. """ # Define variables feature_cols = numerical_cols + categorical_cols X = df_anomalies[feature_cols].astype(float) y = df_anomalies["predictions"].astype(int) y_inliers = np.array([x if x > 0 else 0 for x in y]) # Defined for inliers levels y_outliers = np.array([1 if x < 0 else 0 for x in y]) # Defined for outlier levels # Feature binarize fb = FeatureBinarizer(negations=True, returnOrd=True, colsCateg=categorical_cols, numThres=90) X_fb, X_std = fb.fit_transform(X) # Choose model if rule_algorithm == "brlg": # Default params if "lambda0" not in model_params.keys(): model_params["lambda0"] = 1e-3 if "lambda1" not in model_params.keys(): model_params["lambda1"] = 1e-3 if "CNF" not in model_params.keys(): model_params["CNF"] = False # Inliers model_rules = BooleanRuleCG(**model_params) model_rules.fit(X_fb, y_inliers) list_rules_inliers = model_rules.explain()["rules"] # Outliers model_rules = BooleanRuleCG(**model_params) model_rules.fit(X_fb, y_outliers) list_rules_outliers = model_rules.explain()["rules"] elif rule_algorithm == "logrr": # Default params if "lambda0" not in model_params.keys(): model_params["lambda0"] = 0.005 if "lambda1" not in model_params.keys(): model_params["lambda1"] = 0.001 # Obtain rules [Inliers] model_rules = LogisticRuleRegression(**model_params) model_rules.fit(X_fb, y_inliers, X_std) df_rules = model_rules.explain() try: # Inliers df_rules_inliers = df_rules[ (df_rules["coefficient"] > 0) & (df_rules["rule/numerical feature"] != "(intercept)")] list_rules_inliers = list( df_rules_inliers["rule/numerical feature"]) # Outliers df_rules_outliers = df_rules[ (df_rules["coefficient"] < 0) & (df_rules["rule/numerical feature"] != "(intercept)")] list_rules_outliers = list( df_rules_outliers["rule/numerical feature"]) except KeyError: # Inliers df_rules_inliers = df_rules[(df_rules["coefficient"] > 0) & (df_rules["rule"] != "(intercept)")] list_rules_inliers = list(df_rules_inliers["rule"]) # Outliers df_rules_outliers = df_rules[(df_rules["coefficient"] < 0) & (df_rules["rule"] != "(intercept)")] list_rules_outliers = list(df_rules_outliers["rule"]) else: raise ValueError( "Argument {0} not recognised -- use 'brlg' or 'logrr' instead") # Turn to DF list_rules_inliers = [x.replace("AND", "&") for x in list_rules_inliers] list_rules_outliers = [x.replace("AND", "&") for x in list_rules_outliers] df_inliers = turn_rules_to_df(list_rules=list_rules_inliers, list_cols=feature_cols) df_outliers = turn_rules_to_df(list_rules=list_rules_outliers, list_cols=feature_cols) # Get rule size df_inliers = df_inliers.reset_index(drop=True) df_inliers["size_rules"] = [len(x.split("&")) for x in list_rules_inliers] df_outliers = df_outliers.reset_index(drop=True) df_outliers["size_rules"] = [ len(x.split("&")) for x in list_rules_outliers ] # Prune rules if simplify_rules: if len(df_inliers) > 0: df_rules_pruned = simplifyRules( df_inliers.drop(columns=["size_rules"]), categorical_cols) df_rules_pruned = df_rules_pruned.reset_index().merge( df_inliers.reset_index()[["index", "size_rules"]], how="left") df_rules_pruned.index = df_rules_pruned["index"] df_rules_pruned = df_rules_pruned.drop(columns=["index"], errors="ignore") df_rules_inliers = df_rules_pruned.copy() df_rules_inliers["rule_prediction"] = 1 else: df_rules_inliers = pd.DataFrame() if len(df_outliers) > 0: df_rules_pruned = simplifyRules( df_outliers.drop(columns=["size_rules"]), categorical_cols) df_rules_pruned = df_rules_pruned.reset_index().merge( df_outliers.reset_index()[["index", "size_rules"]], how="left") df_rules_pruned.index = df_rules_pruned["index"] df_rules_pruned = df_rules_pruned.drop(columns=["index"], errors="ignore") df_rules_outliers = df_rules_pruned.copy() df_rules_outliers["rule_prediction"] = -1 else: df_rules_outliers = pd.DataFrame() else: df_rules_inliers = df_inliers df_rules_inliers["rule_prediction"] = 1 df_rules_outliers = df_outliers df_rules_outliers["rule_prediction"] = -1 return df_rules_inliers, df_rules_outliers