Пример #1
0
import pandas as pd
import numpy as np
from pyids import IDS
from pyids.algorithms import mine_CARs
from pyids.data_structures import IDSRuleSet
import json
from pyarc.qcba.data_structures import QuantitativeDataFrame

import random
import logging
import time

logging.basicConfig(level=logging.INFO)

df = pd.read_csv("../../../data/iris0.csv")
cars = mine_CARs(df, 10, sample=False)
ids_ruleset = IDSRuleSet.from_cba_rules(cars).ruleset

quant_dataframe = QuantitativeDataFrame(df)


def generate_lambda_arr(one_idx):
    total_params = 7

    if one_idx == 0:
        start_arr = []
    else:
        start_arr = one_idx * [0]

    end_arr = (total_params - one_idx - 1) * [0]
Пример #2
0
    data_count = df.shape[0]

    quant_df = QuantitativeDataFrame(df)

    for algorithm in ["DLS", "SLS", "DUSM", "RUSM"]:
        durations = []

        for _ in range(time_estimation_iterations):
            print(_)
            lambda_array = generate_lambda_array()

            print(f"data count: {data_count}")
            print(f"algorithm: {algorithm}")
            print(f"using lambda: {lambda_array}")

            cars = mine_CARs(df, rule_cutoff=rule_cutoff)

            ids = IDS(algorithm=algorithm)
            start = time.time()
            ids.fit(class_association_rules=cars,
                    quant_dataframe=quant_df,
                    lambda_array=lambda_array)
            duration = time.time() - start

            print(f"avg duration: {duration}")

            durations.append(duration)

        duration = np.mean(durations)

        print(f"avg duration: {duration}")
Пример #3
0
from pyids.model_selection.random_search import RandomSearch
from pyids.algorithms.ids import IDS
from pyids.algorithms import mine_CARs

from pyarc.qcba.data_structures import QuantitativeDataFrame

import pandas as pd
import numpy as np

df_iris = pd.read_csv("../../../data/iris0.csv")
quant_df = QuantitativeDataFrame(df_iris)
cars = mine_CARs(df_iris, 40)


def is_solution_interpretable(metrics):
    print(metrics)
    return (metrics["fraction_overlap"] <= 0.3
            and metrics["fraction_classes"] > 1.0
            and metrics["fraction_uncovered"] <= 0.3
            and metrics["average_rule_width"] < 8
            and metrics["ruleset_length"] <= 10)


def solution_interpretability_distance(metrics):
    distance_vector = np.array([
        max(metrics["fraction_overlap"] - 0.1, 0),
        max(1 - metrics["fraction_classes"], 0),
        max(metrics["fraction_uncovered"] - 0.15, 0),
        max(metrics["average_rule_width"] - 8, 0),
        max(metrics["ruleset_length"] - 10, 0)
    ])
Пример #4
0
from pyids.model_selection import CoordinateAscent
from pyids.algorithms.ids import IDS
from pyids.algorithms import mine_CARs, mine_IDS_ruleset

from pyarc.qcba.data_structures import QuantitativeDataFrame

import pandas as pd
import numpy as np

df_iris = pd.read_csv("../../../data/iris0.csv")
quant_df = QuantitativeDataFrame(df_iris)
cars = mine_CARs(df_iris, 30)

interpretability_bounds = dict(fraction_overlap=0.1,
                               fraction_classes=1,
                               fraction_uncovered=0.35,
                               average_rule_width=8,
                               ruleset_length=10)


def is_solution_interpretable(metrics):
    print(metrics)
    return (metrics["fraction_overlap"] <=
            interpretability_bounds["fraction_overlap"]
            and metrics["fraction_classes"] >=
            interpretability_bounds["fraction_classes"]
            and metrics["fraction_uncovered"] <=
            interpretability_bounds["fraction_uncovered"]
            and metrics["average_rule_width"] <=
            interpretability_bounds["average_rule_width"]
            and metrics["ruleset_length"] <=
Пример #5
0
from pyids.algorithms import mine_CARs
from pyids.algorithms.ids_multiclass import IDSOneVsAll
from pyids.data_structures import IDSRuleSet

from pyarc.qcba.data_structures import QuantitativeDataFrame
from pyarc.data_structures import TransactionDB
from pyarc import CBA

import random
import logging
import time

import matplotlib.pyplot as plt

df = pd.read_csv("c:/code/python/machine_learning/assoc_rules/train/iris0.csv")
cars = mine_CARs(df, rule_cutoff=40)
quant_dataframe = QuantitativeDataFrame(df)


def is_solution_interpretable(metrics):
    print(metrics)
    return (
        metrics["fraction_overlap"] <= 0.10 and
        metrics["fraction_classes"] > 1.0 and
        metrics["fraction_uncovered"] <= 0.15 and
        metrics["average_rule_width"] < 8 and
        metrics["ruleset_length"] <= 10
    )

def solution_interpretability_distance(metrics):
    distance_vector = np.array([
Пример #6
0
from pyids.model_selection import CoordinateAscent
from pyids.algorithms.ids import IDS
from pyids.algorithms import mine_CARs, mine_IDS_ruleset

from pyarc.qcba.data_structures import QuantitativeDataFrame

import pandas as pd
import numpy as np

df_iris = pd.read_csv("../../../data/iris0.csv")
quant_df = QuantitativeDataFrame(df_iris)
cars = mine_CARs(df_iris, 20)

interpretability_bounds = dict(fraction_overlap=0.1,
                               fraction_classes=1,
                               fraction_uncovered=0.35,
                               average_rule_width=8,
                               ruleset_length=10)


def is_solution_interpretable(metrics):
    print(metrics)
    return (metrics["fraction_overlap"] <=
            interpretability_bounds["fraction_overlap"]
            and metrics["fraction_classes"] >=
            interpretability_bounds["fraction_classes"]
            and metrics["fraction_uncovered"] <=
            interpretability_bounds["fraction_uncovered"]
            and metrics["average_rule_width"] <=
            interpretability_bounds["average_rule_width"]
            and metrics["ruleset_length"] <=
Пример #7
0
from pyids.algorithms.ids import IDS
from pyids.algorithms import mine_CARs
from pyarc.qcba.data_structures import QuantitativeDataFrame
from pyids.model_selection.coordinate_ascent import CoordinateAscent

lambda_dict = {
    'l1': 124.16415180612711,
    'l2': 38.896662094192955,
    'l3': 557.0996799268405,
    'l4': 638.188385916781,
    'l5': 136.48056698673983,
    'l6': 432.1760402377687,
    'l7': 452.1563786008231
}
lambda_array = [
    665.9341563786008, 271.7242798353909, 212.34156378600824,
    20.489711934156375, 648.5761316872428, 911, 560
]

df = pd.read_csv("C:/code/python/machine_learning/assoc_rules/train/iris0.csv")
quant_df = QuantitativeDataFrame(df)
quant_df_test = QuantitativeDataFrame(
    pd.read_csv("C:/code/python/machine_learning/assoc_rules/test/iris0.csv"))

cars = mine_CARs(df, 20)

ids = IDS(algorithm="DUSM")
ids.fit(quant_df, cars, lambda_array=lambda_array)

print(ids.score_auc(quant_df))