Exemplo n.º 1
0
	def train_r_0(self):
		"""
		Trains the rule model that outputs rules that predict
		for class 0. 
		"""

		# the rules generated predict for label 0. 
		br_0 = BooleanRuleCG(CNF = False)
		br_0.fit(self.binarized_train_data, self.train_labels)
		self.r_0 = br_0
		return
Exemplo n.º 2
0
	def train_r_1(self):
		"""
		Trains the rule model that outputs rules that predict
		for class 1.
		"""

		# the rules generated predict for label 0, so we hvae to 
		# invert the labels to generate rules that predict
		# for label 1.
		br_1 = BooleanRuleCG(CNF = False)
		inverted_train_labels = []
		for label in self.train_labels:
			if label:
				inverted_train_labels.append(0)
			else:
				inverted_train_labels.append(1)

		br_1.fit(self.binarized_train_data, np.array(inverted_train_labels))
		self.r_1 = br_1
		return
Exemplo n.º 3
0
def aix360_rules_wrapper(
    df_anomalies,
    numerical_cols,
    categorical_cols,
    rule_algorithm="",
    simplify_rules=False,
    model_params={},
):
    """
    Rules obtained using brlg or logrr.

    Parameters
    ----------
    df_anomalies : TYPE
        DESCRIPTION.
    numerical_cols : TYPE
        DESCRIPTION.
    categorical_cols : TYPE
        DESCRIPTION.
    rule_algorithm : TYPE, optional
        DESCRIPTION. The default is "".
    simplify_rules : TYPE, optional
        DESCRIPTION. The default is False.
    model_params : TYPE, optional
        DESCRIPTION. The default is {}.

    Raises
    ------
    ValueError
        DESCRIPTION.

    Returns
    -------
    df_rules_inliers : TYPE
        DESCRIPTION.
    df_rules_outliers : TYPE
        DESCRIPTION.

    """

    # Define variables
    feature_cols = numerical_cols + categorical_cols
    X = df_anomalies[feature_cols].astype(float)
    y = df_anomalies["predictions"].astype(int)
    y_inliers = np.array([x if x > 0 else 0
                          for x in y])  # Defined for inliers levels
    y_outliers = np.array([1 if x < 0 else 0
                           for x in y])  # Defined for outlier levels

    # Feature binarize
    fb = FeatureBinarizer(negations=True,
                          returnOrd=True,
                          colsCateg=categorical_cols,
                          numThres=90)
    X_fb, X_std = fb.fit_transform(X)

    # Choose model
    if rule_algorithm == "brlg":

        # Default params
        if "lambda0" not in model_params.keys():
            model_params["lambda0"] = 1e-3
        if "lambda1" not in model_params.keys():
            model_params["lambda1"] = 1e-3
        if "CNF" not in model_params.keys():
            model_params["CNF"] = False
        # Inliers
        model_rules = BooleanRuleCG(**model_params)
        model_rules.fit(X_fb, y_inliers)
        list_rules_inliers = model_rules.explain()["rules"]

        # Outliers
        model_rules = BooleanRuleCG(**model_params)
        model_rules.fit(X_fb, y_outliers)
        list_rules_outliers = model_rules.explain()["rules"]
    elif rule_algorithm == "logrr":

        # Default params
        if "lambda0" not in model_params.keys():
            model_params["lambda0"] = 0.005
        if "lambda1" not in model_params.keys():
            model_params["lambda1"] = 0.001
        # Obtain rules [Inliers]
        model_rules = LogisticRuleRegression(**model_params)
        model_rules.fit(X_fb, y_inliers, X_std)
        df_rules = model_rules.explain()

        try:
            # Inliers
            df_rules_inliers = df_rules[
                (df_rules["coefficient"] > 0)
                & (df_rules["rule/numerical feature"] != "(intercept)")]
            list_rules_inliers = list(
                df_rules_inliers["rule/numerical feature"])

            # Outliers
            df_rules_outliers = df_rules[
                (df_rules["coefficient"] < 0)
                & (df_rules["rule/numerical feature"] != "(intercept)")]
            list_rules_outliers = list(
                df_rules_outliers["rule/numerical feature"])
        except KeyError:
            # Inliers
            df_rules_inliers = df_rules[(df_rules["coefficient"] > 0)
                                        & (df_rules["rule"] != "(intercept)")]
            list_rules_inliers = list(df_rules_inliers["rule"])

            # Outliers
            df_rules_outliers = df_rules[(df_rules["coefficient"] < 0)
                                         & (df_rules["rule"] != "(intercept)")]
            list_rules_outliers = list(df_rules_outliers["rule"])
    else:
        raise ValueError(
            "Argument {0} not recognised -- use 'brlg' or 'logrr' instead")
    # Turn to DF
    list_rules_inliers = [x.replace("AND", "&") for x in list_rules_inliers]
    list_rules_outliers = [x.replace("AND", "&") for x in list_rules_outliers]
    df_inliers = turn_rules_to_df(list_rules=list_rules_inliers,
                                  list_cols=feature_cols)
    df_outliers = turn_rules_to_df(list_rules=list_rules_outliers,
                                   list_cols=feature_cols)

    # Get rule size
    df_inliers = df_inliers.reset_index(drop=True)
    df_inliers["size_rules"] = [len(x.split("&")) for x in list_rules_inliers]
    df_outliers = df_outliers.reset_index(drop=True)
    df_outliers["size_rules"] = [
        len(x.split("&")) for x in list_rules_outliers
    ]

    # Prune rules
    if simplify_rules:
        if len(df_inliers) > 0:
            df_rules_pruned = simplifyRules(
                df_inliers.drop(columns=["size_rules"]), categorical_cols)
            df_rules_pruned = df_rules_pruned.reset_index().merge(
                df_inliers.reset_index()[["index", "size_rules"]], how="left")
            df_rules_pruned.index = df_rules_pruned["index"]
            df_rules_pruned = df_rules_pruned.drop(columns=["index"],
                                                   errors="ignore")
            df_rules_inliers = df_rules_pruned.copy()
            df_rules_inliers["rule_prediction"] = 1
        else:
            df_rules_inliers = pd.DataFrame()
        if len(df_outliers) > 0:
            df_rules_pruned = simplifyRules(
                df_outliers.drop(columns=["size_rules"]), categorical_cols)
            df_rules_pruned = df_rules_pruned.reset_index().merge(
                df_outliers.reset_index()[["index", "size_rules"]], how="left")
            df_rules_pruned.index = df_rules_pruned["index"]
            df_rules_pruned = df_rules_pruned.drop(columns=["index"],
                                                   errors="ignore")
            df_rules_outliers = df_rules_pruned.copy()
            df_rules_outliers["rule_prediction"] = -1
        else:
            df_rules_outliers = pd.DataFrame()
    else:
        df_rules_inliers = df_inliers
        df_rules_inliers["rule_prediction"] = 1
        df_rules_outliers = df_outliers
        df_rules_outliers["rule_prediction"] = -1
    return df_rules_inliers, df_rules_outliers