示例#1
0
    def _compute_lace_step(self,
                           encoded_instance,
                           k, target_class_frequency,
                           target_class_index, class_prob, single_attribute_differences):
        # Generate the neighborhood of the instance, classify it, and return the rules created by L3
        l3clf = L3Classifier(min_sup=self.min_sup)
        rules = _create_locality_and_get_rules(self.train_dataset, self.nbrs, encoded_instance, k,
                                               self.clf, l3clf)

        # For each rule, calculate the prediction difference for the its attributes
        difference_map = {}
        for rule in rules:
            rule_key = ",".join(map(str, rule))
            difference_map[rule_key] = _compute_prediction_difference_subset(
                self.train_dataset, encoded_instance, rule,
                self.clf, target_class_index)

        # Compute the approximation error
        _, error, _ = _compute_approximation_error(
            target_class_frequency,
            class_prob,
            single_attribute_differences,
            rules,
            difference_map)

        return difference_map, error
示例#2
0
def test_save_load(dataset_X_y):
    X, y = dataset_X_y
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=42)
    clf = L3Classifier().fit(X_train, y_train)

    # test dump with joblib pre-predict
    dump(clf, "clf_pre_predict.joblib")
    y_pred = clf.predict(X_test)
    assert y_pred.shape[0] == X_test.shape[0]
    assert len(clf.labeled_transactions_) == X_test.shape[0]

    # test dump with joblib post-predict
    dump(clf, "clf.joblib")
    clf_l = load("clf.joblib")
    assert len(clf.lvl1_rules_) == len(clf_l.lvl1_rules_)

    # test dump with pickle
    with open("clf.pickle", "wb") as fp:
        pickle.dump(clf, fp)
    with open("clf.pickle", "rb") as fp:
        clf_l = pickle.load(fp)
    assert len(clf.lvl2_rules_) == len(clf_l.lvl2_rules_)
示例#3
0
def test_grid_search(dataset_X_y, get_param_grid):
    X, y = dataset_X_y
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=42)

    clf = GridSearchCV(L3Classifier(), get_param_grid, n_jobs=-1)
    clf.fit(X, y)

    print(clf.best_estimator_)
    print(clf.best_score_)
示例#4
0
def test_training_files(dataset_X_y):
    X, y = dataset_X_y
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=42)
    clf = L3Classifier().fit(X_train, y_train, remove_files=False)
    train_dir = clf.current_token_
    files = [
        f for f in os.listdir(train_dir)
        if f.startswith(f"{clf.current_token_}")
    ]
    assert len(files) == 7  # all the stuff left by L3
示例#5
0
def test_save_human_readable(dataset_X_y):
    X, y = dataset_X_y
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=42)
    clf = L3Classifier().fit(X_train, y_train, save_human_readable=True)
    train_dir = clf.current_token_
    files = [
        f for f in os.listdir(train_dir)
        if f.startswith(f"{clf.current_token_}")
    ]
    assert len(files) == 2  # level 1 and level 2
示例#6
0
def test_fit_predict(dataset_X_y):
    X, y = dataset_X_y
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=42)
    clf = L3Classifier().fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    assert y_pred.shape[0] == X_test.shape[0]
    assert len(clf.labeled_transactions_) == X_test.shape[0]
    print(clf.labeled_transactions_[1].matched_rules,
          clf.labeled_transactions_[1].used_level)
    print(len([t for t in clf.labeled_transactions_ if t.used_level == -1]))
示例#7
0
def test_leve1_modifier(dataset_X_y):
    X, y = dataset_X_y
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        random_state=42)
    clf = L3Classifier(rule_sets_modifier='level1').fit(X, y)
    assert clf.n_lvl2_rules_ == 0 and len(clf.lvl2_rules_) == 0

    y_pred = clf.predict(X_test)
    assert y_pred.shape[0] == X_test.shape[0]
    assert len(clf.labeled_transactions_) == X_test.shape[0]
    print(clf.labeled_transactions_[1].matched_rules,
          clf.labeled_transactions_[1].used_level)
    assert len([t for t in clf.labeled_transactions_
                if t.used_level == 2]) == 0