def fit(self, rule_cutoff): dataframes = self._prepare_dataframes() scores = [] for dataframe_train, dataframe_test in dataframes: txns_train = TransactionDB.from_DataFrame(dataframe_train) rules = top_rules(txns_train.string_representation, appearance=txns_train.appeardict) cars = createCARs(rules)[:rule_cutoff] quant_dataframe_train = QuantitativeDataFrame(dataframe_train) quant_dataframe_test = QuantitativeDataFrame(dataframe_test) self.classifier.fit(quant_dataframe_train, cars, debug=self.debug) score = None if self.score_auc: score = self.classifier.score_auc(quant_dataframe_test) else: score = self.classifier.score(quant_dataframe_test) scores.append(score) return scores
def fit(self, quant_dataframe, cars=None, rule_cutoff=30, lambda_array=7 * [1], class_name=None, debug=False, algorithm="SLS"): self.quant_dataframe_train = quant_dataframe self._prepare(quant_dataframe, class_name) for class_, clf_dict in self.ids_classifiers.items(): print("training class:", class_) clf = clf_dict["clf"] quant_dataframe = clf_dict["quant_dataframe"] pandas_dataframe = quant_dataframe.dataframe txns = TransactionDB.from_DataFrame(pandas_dataframe) rules = top_rules(txns.string_representation, appearance=txns.appeardict) cars = createCARs(rules) cars.sort(reverse=True) clf.fit(quant_dataframe, cars[:rule_cutoff], lambda_array=lambda_array, debug=debug, algorithm=algorithm)
def mine_CARs(df, rule_cutoff, sample=False): txns = TransactionDB.from_DataFrame(df) rules = top_rules(txns.string_representation, appearance=txns.appeardict) cars = createCARs(rules) cars_subset = cars[:rule_cutoff] if sample: cars_subset = random.sample(cars, rule_cutoff) return cars_subset
def mine_rules(self, pandas_df, minsup=0.2): frequent_itemsets = self.mine_frequent_itemsets(pandas_df, minsup) distinct_classes = list(pandas_df.iloc[:, -1].unique()) fim_rules = self._convert_to_fim_rules(frequent_itemsets, distinct_classes, pandas_df.columns[-1]) cars = createCARs(fim_rules) print(cars) for car in cars: car.confidence = self._calculate_rule_confidence(car, pandas_df) return cars
def test_createCARs(self): generated_rules = [('Y:=:1', (), 0.5, 0.5), ('Y:=:0', (), 0.5, 0.5), ('Y:=:1', ('A:=:1', ), 0.5, 1 / 3)] cars = createCARs(generated_rules) assert cars[0].consequent == Consequent("Y", 1) assert cars[0].confidence == 0.5 assert cars[0].support == 0.5 assert cars[1].consequent == Consequent("Y", 0) assert cars[1].confidence == 0.5 assert cars[1].support == 0.5 assert cars[2].consequent == Consequent("Y", 1) assert cars[2].antecedent == Antecedent([Item("A", 1)]) assert cars[2].confidence == 1 / 3 assert cars[2].support == 0.5
def mine_CARs(df, rule_cutoff, sample=False, random_seed=None, **top_rules_kwargs): if random_seed: random.seed(random_seed) np.random.seed(random_seed) txns = TransactionDB.from_DataFrame(df) rules = top_rules(txns.string_representation, appearance=txns.appeardict, **top_rules_kwargs) cars = createCARs(rules) cars_subset = cars[:rule_cutoff] if sample: cars_subset = random.sample(cars, rule_cutoff) return cars_subset
import pandas as pd import numpy as np from pyarc.data_structures import TransactionDB from pyarc.algorithms import top_rules, createCARs df = pd.read_csv("./data/iris0.csv") df[df["sepallength"] == "-inf_to_5.55"] = np.NaN print(df) txns = TransactionDB.from_DataFrame(df) rules = top_rules(txns.string_representation, appearance=txns.appeardict) cars = createCARs(rules) for car in cars[:10]: print(car)