Exemplo n.º 1
0
import numpy as np

from wildboar.datasets import load_dataset
from wildboar.ensemble import ShapeletForestClassifier
from wildboar.explain.counterfactual import counterfactuals

x_train, x_test, y_train, y_test = load_dataset("TwoLeadECG",
                                                repository="wildboar/ucr",
                                                merge_train_test=False)

# x_train = x_train.repeat(2, axis=0).reshape(x_train.shape[0], 2, -1)
# print(x_train[0])
# x_test = x_test.repeat(2, axis=0).reshape(x_test.shape[0], 2, -1)

clf = ShapeletForestClassifier(metric="euclidean",
                               random_state=1,
                               n_jobs=-1,
                               n_estimators=100)
clf.fit(x_train, y_train)
print(clf.score(x_test, y_test))
y_pred = clf.predict(x_test)
class_ = clf.classes_[1]
print("Class: %s" % class_)
print("Pred: %r" % y_pred)
x_test = x_test[y_pred != class_][:10]
y_test = y_test[y_pred != class_][:10]

x_counterfactual, success, score = counterfactuals(clf,
                                                   x_test,
                                                   class_,
                                                   random_state=123,
                                                   scoring="euclidean")
Exemplo n.º 2
0
x, y = load_dataset("Car")
x_train, x_test, y_train, y_test = train_test_split(x,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=random_state)

f_embedding = make_pipeline(
    ShapeletForestEmbedding(sparse_output=False, random_state=random_state),
    PCA(n_components=2, random_state=random_state),
)
f_embedding.fit(x_train)
x_embedding = f_embedding.transform(x_test)

classifiers = [
    ("Shapelet forest", ShapeletForestClassifier(random_state=random_state)),
    ("Nearest neighbors", KNeighborsClassifier()),
]

classes = np.unique(y)
n_classes = len(classes)

fig, ax = plt.subplots(
    nrows=len(classifiers),
    ncols=n_classes,
    figsize=(3 * n_classes, 6),
    sharex=True,
    sharey=True,
)
for i, (name, clf) in enumerate(classifiers):
    clf.fit(x_train, y_train)
import numpy as np
from sklearn.model_selection import cross_val_score

from wildboar import datasets
from wildboar.ensemble import ExtraShapeletTreesClassifier, ShapeletForestClassifier

x, y = datasets.load_gun_point()
extra = ExtraShapeletTreesClassifier(
    n_estimators=100, n_jobs=-1, metric="scaled_euclidean"
)
rsf = ShapeletForestClassifier(n_estimators=100, n_jobs=-1, metric="scaled_euclidean")
score = cross_val_score(rsf, x, y, cv=10)
print("RSF", np.mean(score))
score = cross_val_score(extra, x, y, cv=10)
print("Extra", np.mean(score))
Exemplo n.º 4
0
    # etc...
    [[0, 1, 9, 1, 0], [0, 1, 2, 3, 4]],
    [[1, 2, 3, 0, 0], [0, 0, 0, 1, 2]],
    [[0, 0, -1, 0, 1], [1, 2, 3, 0, 1]],
]

# `x` is an array of shape `[5, 2, 5]`, i.e., 5 examples with 2
# dimensions consisting of 5 timesteps
x = np.array(x, dtype=np.float64)
n_samples, n_dimensions, n_timesteps = x.shape

# `y` is the output target
y = np.array([0, 0, 1, 1, 0])

random_state = np.random.RandomState(123)
order = np.arange(n_samples)
random_state.shuffle(order)

x = x[order, :, :]
y = y[order]

f = ShapeletForestClassifier(random_state=random_state, n_shapelets=1)
c = time.time()
f.fit(x, y)

predict = 1
print("Predicting the class of example:")
print(x[predict, :].reshape(-1, n_dimensions, n_timesteps))
print("The true class is:", y[predict], "and the predicted class is:",
      f.predict(x[predict, :].reshape(1, n_dimensions, -1))[0])
Exemplo n.º 5
0
from wildboar.ensemble import ShapeletForestClassifier
from wildboar.explain.counterfactual import counterfactuals

random_state = 1234
x, y = load_dataset("GunPoint")

x_train, x_test, y_train, y_test = train_test_split(x,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=random_state)

classifiers = [
    (
        "Shapelet Forest Counterfactuals",
        ShapeletForestClassifier(metric="euclidean",
                                 random_state=random_state,
                                 n_estimators=100),
    ),
    ("KNearest Counterfactuals", KNeighborsClassifier(metric="euclidean")),
    ("Prototype Counterfactuals",
     RandomForestClassifier(random_state=random_state)),
]

fig, ax = plt.subplots(nrows=3, sharex=True)
label = np.unique(y_train)[0]
for i, (name, clf) in enumerate(classifiers):
    clf.fit(x_train, y_train)
    x_test_sample = x_test[y_test != label]
    if isinstance(clf, RandomForestClassifier):
        kwargs = {"background_x": x_train, "background_y": y_train}
    else:
Exemplo n.º 6
0
def counterfactuals_sf():
    return counterfactuals_plot(
        ShapeletForestClassifier(random_state=123,
                                 n_jobs=-1,
                                 metric="euclidean"))
Exemplo n.º 7
0
from sklearn.model_selection import train_test_split

from wildboar.datasets import load_dataset
from wildboar.ensemble import ShapeletForestClassifier
from wildboar.explain import IntervalImportance

x, y = load_dataset("LargeKitchenAppliances")
x_train, x_test, y_train, y_test = train_test_split(x,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=123)

f = ShapeletForestClassifier(
    n_shapelets=1,
    n_estimators=100,
    n_jobs=-1,
    metric="scaled_euclidean",
    random_state=123,
)
f.fit(x_train, y_train)

i = IntervalImportance(
    n_interval=72,
    scoring="accuracy",
    domain="frequency",
    verbose=True,
    random_state=123,
)
i.fit(f, x_test, y_test)
ax = i.plot(
    x_test,
def test_shapelet_forest_classifier():
    x_train, x_test, y_train, y_test = load_dataset(
        "GunPoint", repository="wildboar/ucr-tiny", merge_train_test=False
    )
    clf = ShapeletForestClassifier(n_estimators=10, n_shapelets=10, random_state=1)
    clf.fit(x_train, y_train)
    branches = [
        (
            [1, -1, 3, 4, 5, -1, -1, 8, -1, 10, -1, -1, -1],
            [2, -1, 12, 7, 6, -1, -1, 9, -1, 11, -1, -1, -1],
        ),
        (
            [1, -1, 3, 4, -1, 6, -1, 8, -1, 10, -1, -1, -1],
            [2, -1, 12, 5, -1, 7, -1, 9, -1, 11, -1, -1, -1],
        ),
        (
            [1, 2, 3, 4, -1, -1, 7, -1, -1, 10, -1, 12, -1, -1, -1],
            [14, 9, 6, 5, -1, -1, 8, -1, -1, 11, -1, 13, -1, -1, -1],
        ),
        (
            [1, 2, 3, 4, -1, -1, -1, 8, -1, 10, -1, 12, -1, -1, -1],
            [14, 7, 6, 5, -1, -1, -1, 9, -1, 11, -1, 13, -1, -1, -1],
        ),
        (
            [1, 2, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, -1, -1, 15, -1, 17, -1, -1],
            [14, 13, 4, -1, 6, -1, 8, -1, 10, -1, 12, -1, -1, -1, 16, -1, 18, -1, -1],
        ),
        (
            [1, 2, -1, 4, 5, -1, 7, -1, -1, 10, -1, -1, 13, 14, -1, -1, -1],
            [12, 3, -1, 9, 6, -1, 8, -1, -1, 11, -1, -1, 16, 15, -1, -1, -1],
        ),
        (
            [1, 2, 3, 4, -1, -1, -1, 8, 9, -1, -1, 12, 13, -1, -1, -1, -1],
            [16, 7, 6, 5, -1, -1, -1, 11, 10, -1, -1, 15, 14, -1, -1, -1, -1],
        ),
        (
            [1, 2, -1, 4, 5, 6, 7, -1, -1, -1, -1, 12, -1, -1, -1],
            [14, 3, -1, 11, 10, 9, 8, -1, -1, -1, -1, 13, -1, -1, -1],
        ),
        (
            [1, 2, 3, -1, 5, -1, -1, 8, 9, 10, -1, 12, -1, -1, -1, -1, -1],
            [16, 7, 4, -1, 6, -1, -1, 15, 14, 11, -1, 13, -1, -1, -1, -1, -1],
        ),
        (
            [1, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12, -1, -1, -1],
            [14, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, -1, -1],
        ),
    ]

    thresholds = [
        (
            [
                3.728410228070656,
                11.127575591141072,
                7.383224794807461,
                7.109350684315213,
                0.9248183559076002,
                6.08675185469423,
            ],
            [
                3.728410228070656,
                11.127575591141072,
                7.383224794807461,
                7.109350684315213,
                0.9248183559076002,
                6.08675185469423,
            ],
        ),
        (
            [
                2.468504005311855,
                7.912505524900922,
                1.0551252327034113,
                0.8574299925766751,
                0.5760307808209804,
                0.009237440363224308,
            ],
            [
                2.468504005311855,
                7.912505524900922,
                1.0551252327034113,
                0.8574299925766751,
                0.5760307808209804,
                0.009237440363224308,
            ],
        ),
        (
            [
                3.909569808800988,
                2.821010442496668,
                1.8694668182965288,
                0.034583372931197384,
                0.8137102058624538,
                0.7560554810866997,
                2.713102595233928,
            ],
            [
                3.909569808800988,
                2.821010442496668,
                1.8694668182965288,
                0.034583372931197384,
                0.8137102058624538,
                0.7560554810866997,
                2.713102595233928,
            ],
        ),
        (
            [
                5.391042553752862,
                4.420547070721347,
                2.2716225008196576,
                0.6679258993537478,
                1.5471177855226528,
                1.2706259403508802,
                6.379381672446367,
            ],
            [
                5.391042553752862,
                4.420547070721347,
                2.2716225008196576,
                0.6679258993537478,
                1.5471177855226528,
                1.2706259403508802,
                6.379381672446367,
            ],
        ),
        (
            [
                2.784221806516613,
                3.9613021926565697,
                0.43050821107331483,
                1.3603965501478146,
                1.9817847740610532,
                0.557171910946499,
                0.023161212907754903,
                3.2040403820972045,
                0.25123702588573155,
            ],
            [
                2.784221806516613,
                3.9613021926565697,
                0.43050821107331483,
                1.3603965501478146,
                1.9817847740610532,
                0.557171910946499,
                0.023161212907754903,
                3.2040403820972045,
                0.25123702588573155,
            ],
        ),
        (
            [
                9.06314095909644,
                0.9301861459984877,
                1.2749535932250209,
                0.6602701901531287,
                0.3105779260645574,
                3.199344210068309,
                1.7444498163002922,
                0.9679068532147111,
            ],
            [
                9.06314095909644,
                0.9301861459984877,
                1.2749535932250209,
                0.6602701901531287,
                0.3105779260645574,
                3.199344210068309,
                1.7444498163002922,
                0.9679068532147111,
            ],
        ),
        (
            [
                10.684770463276237,
                1.0443634502866903,
                2.657944200018761,
                0.31997645008775166,
                8.506009151805937,
                2.5790890876760417,
                2.444351040739898,
                0.8797498982567451,
            ],
            [
                10.684770463276237,
                1.0443634502866903,
                2.657944200018761,
                0.31997645008775166,
                8.506009151805937,
                2.5790890876760417,
                2.444351040739898,
                0.8797498982567451,
            ],
        ),
        (
            [
                8.903669489275785,
                2.558013265746756,
                1.9352062567009694,
                0.6160338380839283,
                1.1133147922166846,
                2.6673841033247827,
                0.6693157414483296,
            ],
            [
                8.903669489275785,
                2.558013265746756,
                1.9352062567009694,
                0.6160338380839283,
                1.1133147922166846,
                2.6673841033247827,
                0.6693157414483296,
            ],
        ),
        (
            [
                2.9771351955856753,
                3.4048368843307957,
                2.847751510400112,
                1.2655496884627422,
                4.410184513977114,
                2.3116642536119203,
                0.5858765536466852,
                0.7586458184224343,
            ],
            [
                2.9771351955856753,
                3.4048368843307957,
                2.847751510400112,
                1.2655496884627422,
                4.410184513977114,
                2.3116642536119203,
                0.5858765536466852,
                0.7586458184224343,
            ],
        ),
        (
            [
                6.260659343273105,
                0.05120063347084325,
                0.678745571123132,
                5.913261089713139,
                0.25431501853894734,
                0.27996560751446015,
                0.7309024510514174,
            ],
            [
                6.260659343273105,
                0.05120063347084325,
                0.678745571123132,
                5.913261089713139,
                0.25431501853894734,
                0.27996560751446015,
                0.7309024510514174,
            ],
        ),
    ]

    for estimator, (left, right), (left_threshold, right_threshold) in zip(
        clf.estimators_, branches, thresholds
    ):
        assert_equal(left, estimator.tree_.left)
        assert_equal(right, estimator.tree_.right)
        assert_almost_equal(
            left_threshold, estimator.tree_.threshold[estimator.tree_.left > 0]
        )
        assert_almost_equal(
            right_threshold, estimator.tree_.threshold[estimator.tree_.right > 0]
        )
Exemplo n.º 9
0
import matplotlib.pylab as plt
import numpy as np

from wildboar.datasets import load_dataset
from wildboar.ensemble import ShapeletForestClassifier
from wildboar.explain.counterfactual import PrototypeCounterfactual

x_train, x_test, y_train, y_test = load_dataset("TwoLeadECG",
                                                repository="wildboar/ucr",
                                                merge_train_test=False)
x_test_original = x_test
y_test_original = y_test

# clf = KNeighborsClassifier(n_neighbors=5, metric="euclidean")
clf = ShapeletForestClassifier(n_estimators=100,
                               metric="scaled_euclidean",
                               random_state=10,
                               n_jobs=-1)
# clf = RandomForestClassifier()
clf.fit(x_train, y_train)

cf = PrototypeCounterfactual(
    train_x=x_train,
    train_y=y_train,
    metric="dtw",
    metric_params={"r": 0.1},
    method="nearest_shapelet",
    method_params={
        "min_shapelet_size": 0.1,
        "max_shapelet_size": 0.2
    },
    target=0.70,
Exemplo n.º 10
0
from sklearn.neighbors import KNeighborsClassifier

from wildboar.datasets import list_datasets, load_dataset
from wildboar.ensemble import ExtraShapeletTreesClassifier, ShapeletForestClassifier

random_state = 1234
classifiers = {
    "Nearest neighbors":
    KNeighborsClassifier(
        n_neighbors=1,
        metric="euclidean",
    ),
    "Shapelet forest":
    ShapeletForestClassifier(
        n_shapelets=10,
        metric="scaled_euclidean",
        random_state=random_state,
        n_jobs=-1,
    ),
    "Extra shapelet trees":
    ExtraShapeletTreesClassifier(
        metric="scaled_euclidean",
        n_jobs=-1,
        random_state=random_state,
    ),
}

repository = "wildboar/ucr-tiny"
datasets = list_datasets(repository)
df = pd.DataFrame(columns=classifiers.keys(), index=datasets, dtype=float)
for dataset in datasets:
    print(dataset)
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

from wildboar.datasets import load_dataset
from wildboar.ensemble import ShapeletForestClassifier
from wildboar.explain.counterfactual import counterfactuals
from wildboar.linear_model import RocketClassifier

x, y = load_dataset("GunPoint")
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=123)

rocket = RocketClassifier(n_kernels=1000, n_jobs=-1, random_state=123)
rocket.fit(x_train, y_train)
print("Rocket score", rocket.score(x_test, y_test))

sf = ShapeletForestClassifier(n_shapelets=10, n_jobs=-1, random_state=123)
sf.fit(x_train, y_train)
print("Shapelet score", sf.score(x_test, y_test))

nearest = KNeighborsClassifier(n_jobs=-1)
nearest.fit(x_train, y_train)
print("Neighbors score", nearest.score(x_test, y_test))

x_test_cls = x_test[y_test == 1.0][:2]

for method in [rocket, sf, nearest]:
    print(method)
    x_counter, x_success = counterfactuals(
        method,
        x_test_cls,
        2.0,
Exemplo n.º 12
0
    tree = ShapeletTreeClassifier(random_state=10, metric="scaled_dtw")
    tree.fit(x, y, sample_weight=np.ones(x.shape[0]) / x.shape[0])
    print_tree(tree.root_node_)

    print("Score")
    print(tree.score(x, y))
    print("score_done")

    train = np.loadtxt("data/synthetic_control_TRAIN", delimiter=",")
    test = np.loadtxt("data/synthetic_control_TEST", delimiter=",")

    y = train[:, 0].astype(np.intp)
    x = train[:, 1:].astype(np.float64)
    i = np.arange(x.shape[0])

    np.random.shuffle(i)

    x_test = test[:, 1:].astype(np.float64)
    y_test = test[:, 0].astype(np.intp)

    f = ShapeletForestClassifier(n_shapelets=1,
                                 metric="scaled_dtw",
                                 metric_params={"r": 0.1})
    f.fit(x, y)
    c = time.time()
    f.fit(x, y)
    print(f.classes_)
    print("acc:", f.score(x_test, y_test))
    print(round(time.time() - c) * 1000)