def test_paired_dtw_distance(r, expected): x, y = load_dataset("GunPoint", repository="wildboar/ucr-tiny") assert_almost_equal( paired_distance(x[0:3], x[30:33], metric="dtw", metric_params={"r": r}), expected, )
def test_extra_tree_regressor(): x, y = load_dataset("GunPoint", repository="wildboar/ucr-tiny") f = ExtraShapeletTreeRegressor(criterion="mse", random_state=123) f.fit(x, y.astype(float)) assert_almost_equal(f.tree_.threshold[0], 2.052192203219023) assert_almost_equal(f.tree_.threshold[6], 0.8963769152751806) assert_almost_equal(f.predict(x), y.astype(float))
def test_extra_tree_classifier(criterion, expected_left, expected_right, threshold): x, y = load_dataset("GunPoint", repository="wildboar/ucr-tiny") f = ExtraShapeletTreeClassifier(criterion=criterion, random_state=123) f.fit(x, y) assert (f.predict(x) == y).sum() == 191 assert_equal(f.tree_.left, expected_left) assert_equal(f.tree_.right, expected_right) assert_equal(f.tree_.left > 0, f.tree_.right > 0) assert_almost_equal(f.tree_.threshold[f.tree_.left > 0], threshold)
def test(dataset, f, random_state=None): x, y = load_dataset( dataset, repository="wildboar/tsereg", merge_train_test=True, preprocess=None, ) x_train, x_test, y_train, y_test = train_test_split( x, y, random_state=random_state) f.fit(x_train, y_train) print("%.6f" % mean_squared_error(y_test, f.predict(x_test), squared=False))
def test_paired_subsequence_distance(metric, metric_params, expected_min_dist, expected_min_ind): x, y = load_dataset("GunPoint", repository="wildboar/ucr-tiny") min_dist, min_ind = paired_subsequence_distance( x[[2, 3, 8], 0:20], x[40:43], metric=metric, metric_params=metric_params, return_index=True, ) assert_almost_equal(min_dist, expected_min_dist) assert_equal(min_ind, expected_min_ind)
import matplotlib.pylab as plt from sklearn.decomposition import PCA from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from wildboar.datasets import load_dataset from wildboar.ensemble import IsolationShapeletForest, ShapeletForestEmbedding random_state = 1234 x, y = load_dataset("CBF", repository="wildboar/outlier:easy") x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=random_state, stratify=y ) metric = "euclidean" embedding = make_pipeline( ShapeletForestEmbedding( metric=metric, random_state=random_state, sparse_output=False ), PCA(n_components=2, random_state=random_state), ) isf = IsolationShapeletForest( contamination=0.05, metric=metric, random_state=random_state, n_jobs=-1, ) isf.fit(x_train) embedding.fit(x_train) x_embedding = embedding.transform(x_test)
from sklearn.linear_model import RidgeClassifierCV from sklearn.pipeline import make_pipeline from wildboar.datasets import load_dataset from wildboar.embed import RandomShapeletEmbedding x_train, x_test, y_train, y_test = load_dataset("GunPoint", merge_train_test=False) pipe = make_pipeline(RandomShapeletEmbedding(metric="scaled_euclidean"), RidgeClassifierCV()) pipe.fit(x_train, y_train) print(pipe.score(x_test, y_test))
import matplotlib.pylab as plt from sklearn.model_selection import train_test_split from wildboar.datasets import load_dataset from wildboar.ensemble import ShapeletForestClassifier from wildboar.explain import IntervalImportance x, y = load_dataset("LargeKitchenAppliances") x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=123) f = ShapeletForestClassifier( n_shapelets=1, n_estimators=100, n_jobs=-1, metric="scaled_euclidean", random_state=123, ) f.fit(x_train, y_train) i = IntervalImportance( n_interval=72, scoring="accuracy", domain="frequency", verbose=True, random_state=123, ) i.fit(f, x_test, y_test) ax = i.plot(
def test_shapelet_forest_classifier(): x_train, x_test, y_train, y_test = load_dataset( "GunPoint", repository="wildboar/ucr-tiny", merge_train_test=False ) clf = ShapeletForestClassifier(n_estimators=10, n_shapelets=10, random_state=1) clf.fit(x_train, y_train) branches = [ ( [1, -1, 3, 4, 5, -1, -1, 8, -1, 10, -1, -1, -1], [2, -1, 12, 7, 6, -1, -1, 9, -1, 11, -1, -1, -1], ), ( [1, -1, 3, 4, -1, 6, -1, 8, -1, 10, -1, -1, -1], [2, -1, 12, 5, -1, 7, -1, 9, -1, 11, -1, -1, -1], ), ( [1, 2, 3, 4, -1, -1, 7, -1, -1, 10, -1, 12, -1, -1, -1], [14, 9, 6, 5, -1, -1, 8, -1, -1, 11, -1, 13, -1, -1, -1], ), ( [1, 2, 3, 4, -1, -1, -1, 8, -1, 10, -1, 12, -1, -1, -1], [14, 7, 6, 5, -1, -1, -1, 9, -1, 11, -1, 13, -1, -1, -1], ), ( [1, 2, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, -1, -1, 15, -1, 17, -1, -1], [14, 13, 4, -1, 6, -1, 8, -1, 10, -1, 12, -1, -1, -1, 16, -1, 18, -1, -1], ), ( [1, 2, -1, 4, 5, -1, 7, -1, -1, 10, -1, -1, 13, 14, -1, -1, -1], [12, 3, -1, 9, 6, -1, 8, -1, -1, 11, -1, -1, 16, 15, -1, -1, -1], ), ( [1, 2, 3, 4, -1, -1, -1, 8, 9, -1, -1, 12, 13, -1, -1, -1, -1], [16, 7, 6, 5, -1, -1, -1, 11, 10, -1, -1, 15, 14, -1, -1, -1, -1], ), ( [1, 2, -1, 4, 5, 6, 7, -1, -1, -1, -1, 12, -1, -1, -1], [14, 3, -1, 11, 10, 9, 8, -1, -1, -1, -1, 13, -1, -1, -1], ), ( [1, 2, 3, -1, 5, -1, -1, 8, 9, 10, -1, 12, -1, -1, -1, -1, -1], [16, 7, 4, -1, 6, -1, -1, 15, 14, 11, -1, 13, -1, -1, -1, -1, -1], ), ( [1, 2, -1, 4, -1, 6, -1, 8, -1, 10, -1, 12, -1, -1, -1], [14, 3, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, -1, -1], ), ] thresholds = [ ( [ 3.728410228070656, 11.127575591141072, 7.383224794807461, 7.109350684315213, 0.9248183559076002, 6.08675185469423, ], [ 3.728410228070656, 11.127575591141072, 7.383224794807461, 7.109350684315213, 0.9248183559076002, 6.08675185469423, ], ), ( [ 2.468504005311855, 7.912505524900922, 1.0551252327034113, 0.8574299925766751, 0.5760307808209804, 0.009237440363224308, ], [ 2.468504005311855, 7.912505524900922, 1.0551252327034113, 0.8574299925766751, 0.5760307808209804, 0.009237440363224308, ], ), ( [ 3.909569808800988, 2.821010442496668, 1.8694668182965288, 0.034583372931197384, 0.8137102058624538, 0.7560554810866997, 2.713102595233928, ], [ 3.909569808800988, 2.821010442496668, 1.8694668182965288, 0.034583372931197384, 0.8137102058624538, 0.7560554810866997, 2.713102595233928, ], ), ( [ 5.391042553752862, 4.420547070721347, 2.2716225008196576, 0.6679258993537478, 1.5471177855226528, 1.2706259403508802, 6.379381672446367, ], [ 5.391042553752862, 4.420547070721347, 2.2716225008196576, 0.6679258993537478, 1.5471177855226528, 1.2706259403508802, 6.379381672446367, ], ), ( [ 2.784221806516613, 3.9613021926565697, 0.43050821107331483, 1.3603965501478146, 1.9817847740610532, 0.557171910946499, 0.023161212907754903, 3.2040403820972045, 0.25123702588573155, ], [ 2.784221806516613, 3.9613021926565697, 0.43050821107331483, 1.3603965501478146, 1.9817847740610532, 0.557171910946499, 0.023161212907754903, 3.2040403820972045, 0.25123702588573155, ], ), ( [ 9.06314095909644, 0.9301861459984877, 1.2749535932250209, 0.6602701901531287, 0.3105779260645574, 3.199344210068309, 1.7444498163002922, 0.9679068532147111, ], [ 9.06314095909644, 0.9301861459984877, 1.2749535932250209, 0.6602701901531287, 0.3105779260645574, 3.199344210068309, 1.7444498163002922, 0.9679068532147111, ], ), ( [ 10.684770463276237, 1.0443634502866903, 2.657944200018761, 0.31997645008775166, 8.506009151805937, 2.5790890876760417, 2.444351040739898, 0.8797498982567451, ], [ 10.684770463276237, 1.0443634502866903, 2.657944200018761, 0.31997645008775166, 8.506009151805937, 2.5790890876760417, 2.444351040739898, 0.8797498982567451, ], ), ( [ 8.903669489275785, 2.558013265746756, 1.9352062567009694, 0.6160338380839283, 1.1133147922166846, 2.6673841033247827, 0.6693157414483296, ], [ 8.903669489275785, 2.558013265746756, 1.9352062567009694, 0.6160338380839283, 1.1133147922166846, 2.6673841033247827, 0.6693157414483296, ], ), ( [ 2.9771351955856753, 3.4048368843307957, 2.847751510400112, 1.2655496884627422, 4.410184513977114, 2.3116642536119203, 0.5858765536466852, 0.7586458184224343, ], [ 2.9771351955856753, 3.4048368843307957, 2.847751510400112, 1.2655496884627422, 4.410184513977114, 2.3116642536119203, 0.5858765536466852, 0.7586458184224343, ], ), ( [ 6.260659343273105, 0.05120063347084325, 0.678745571123132, 5.913261089713139, 0.25431501853894734, 0.27996560751446015, 0.7309024510514174, ], [ 6.260659343273105, 0.05120063347084325, 0.678745571123132, 5.913261089713139, 0.25431501853894734, 0.27996560751446015, 0.7309024510514174, ], ), ] for estimator, (left, right), (left_threshold, right_threshold) in zip( clf.estimators_, branches, thresholds ): assert_equal(left, estimator.tree_.left) assert_equal(right, estimator.tree_.right) assert_almost_equal( left_threshold, estimator.tree_.threshold[estimator.tree_.left > 0] ) assert_almost_equal( right_threshold, estimator.tree_.threshold[estimator.tree_.right > 0] )
def test_decision_path(): x_train, x_test, y_train, y_test = load_dataset( "GunPoint", repository="wildboar/ucr-tiny", merge_train_test=False) f = ShapeletTreeClassifier(random_state=123) f.fit(x_test, y_test) actual_decision_path = f.decision_path(x_train) expected_decision_path = np.array( [ [ 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], [ 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], [ 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], [ 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], [ 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], [ 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], [ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, ], [ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, ], [ 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], [ 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], [ 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], [ 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], [ 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], [ 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], [ 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], [ 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], [ 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], [ 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], [ 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], [ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, ], [ 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], [ 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], [ 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], [ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, ], [ 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], [ 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], [ 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], [ 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], [ 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], [ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, ], [ 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], [ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, ], [ 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], [ 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], [ 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], [ 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], [ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, ], [ 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], [ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, ], [ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, ], [ 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], [ 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], [ 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], [ 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], [ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, ], [ 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], [ 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], [ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, ], [ 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ], [ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, ], ], dtype=bool, ) assert actual_decision_path.dtype == np.bool_ assert_array_equal(actual_decision_path.toarray(), expected_decision_path)
import matplotlib.pylab as plt import numpy as np from wildboar.datasets import load_dataset from wildboar.ensemble import ShapeletForestClassifier from wildboar.explain.counterfactual import counterfactuals x_train, x_test, y_train, y_test = load_dataset("TwoLeadECG", repository="wildboar/ucr", merge_train_test=False) # x_train = x_train.repeat(2, axis=0).reshape(x_train.shape[0], 2, -1) # print(x_train[0]) # x_test = x_test.repeat(2, axis=0).reshape(x_test.shape[0], 2, -1) clf = ShapeletForestClassifier(metric="euclidean", random_state=1, n_jobs=-1, n_estimators=100) clf.fit(x_train, y_train) print(clf.score(x_test, y_test)) y_pred = clf.predict(x_test) class_ = clf.classes_[1] print("Class: %s" % class_) print("Pred: %r" % y_pred) x_test = x_test[y_pred != class_][:10] y_test = y_test[y_pred != class_][:10] x_counterfactual, success, score = counterfactuals(clf, x_test, class_,
import numpy as np from sklearn.model_selection import cross_validate from wildboar.datasets import load_dataset from wildboar.ensemble import ExtraShapeletTreesClassifier, ShapeletForestClassifier random_state = 1234 x, y = load_dataset("Beef") classifiers = { "Shapelet forest": ShapeletForestClassifier( n_shapelets=10, metric="scaled_euclidean", n_jobs=-1, random_state=random_state, ), "Extra Shapelet Trees": ExtraShapeletTreesClassifier( metric="scaled_euclidean", n_jobs=-1, random_state=random_state, ), } for name, clf in classifiers.items(): score = cross_validate(clf, x, y, scoring="roc_auc_ovo", n_jobs=1) print("Classifier: %s" % name) print(" - fit-time: %.2f" % np.mean(score["fit_time"])) print(" - test-score: %.2f" % np.mean(score["test_score"]))
import matplotlib.pyplot as plt import numpy as np from sklearn.decomposition import PCA from sklearn.pipeline import make_pipeline from wildboar.datasets import load_dataset from wildboar.ensemble import ShapeletForestEmbedding random_state = 1234 x, y = load_dataset("CBF") pca = make_pipeline( ShapeletForestEmbedding( metric="scaled_euclidean", sparse_output=False, max_depth=5, random_state=random_state, ), PCA(n_components=2, random_state=random_state), ) p = pca.fit_transform(x) var = pca.steps[1][1].explained_variance_ratio_ labels, index = np.unique(y, return_inverse=True) colors = plt.cm.rainbow(np.linspace(0, 1, len(labels))) plt.scatter(p[:, 0], p[:, 1], color=colors[index, :]) plt.xlabel("Component 1 (%.2f variance explained)" % var[0]) plt.ylabel("Component 2 (%.2f variance explained)" % var[1]) plt.savefig("fig/sfe_pca.png")
import matplotlib.pylab as plt from wildboar.datasets import load_dataset from wildboar.distance import matrix_profile x, y = load_dataset("TwoLeadECG") x = x[:20].reshape(-1) print(x.shape) mp = matrix_profile(x.reshape(-1), window=20, exclude=0.2) fig, ax = plt.subplots(nrows=2, sharex=True) ax[0].plot(x, color="red", lw=0.5) ax[1].plot(mp, color="blue", lw=0.5) ax[0].set_title("Time series") ax[1].set_title("Matrix profile") ax[0].set_xlim(0, x.shape[-1]) ax[1].set_xlim(0, x.shape[-1]) plt.tight_layout() plt.savefig("../fig/matrix_profile.png")
from wildboar.datasets import ( list_bundles, list_collections, list_datasets, list_repositories, load_dataset, load_datasets, ) print(list_datasets(repository="wildboar/ucr:no-missing")) print(list_datasets(repository="wildboar/outlier:1.0:hard")) print(list_repositories()) print(list_bundles("wildboar")) for dataset, (x, y) in load_datasets( repository="wildboar/ucr-tiny", filter=["n_timestep>10", "n_samples<=200", "n_labels<=3"], ): print(dataset) for dataset in list_datasets(repository="wildboar/ucr", collection="bake-off"): print(dataset) print(list_collections("wildboar/ucr")) load_dataset("GunPoint", repository="wildboar/ucr-tiny")
import numpy as np from sklearn.model_selection import train_test_split from wildboar.datasets import load_dataset from wildboar.linear_model import RandomShapeletClassifier, RocketClassifier x, y = load_dataset("ItalyPowerDemand", merge_train_test=True, preprocess="standardize") x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=123) print(np.mean(x_train, axis=1)) f = RocketClassifier( n_kernels=10000, sampling="normal", sampling_params={ "mean": 0, "scale": 1 }, n_jobs=16, random_state=123, alphas=np.logspace(-3, 3, 10), normalize=True, ) f.fit(x_train, y_train) print(f.score(x_test, y_test)) f = RandomShapeletClassifier( n_shapelets=10000, n_jobs=16, metric="scaled_euclidean",
import matplotlib.pyplot as plt import numpy as np from sklearn.decomposition import PCA from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier from sklearn.pipeline import make_pipeline from wildboar.datasets import load_dataset from wildboar.ensemble import ShapeletForestClassifier, ShapeletForestEmbedding random_state = 1234 x, y = load_dataset("Car") x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=random_state) f_embedding = make_pipeline( ShapeletForestEmbedding(sparse_output=False, random_state=random_state), PCA(n_components=2, random_state=random_state), ) f_embedding.fit(x_train) x_embedding = f_embedding.transform(x_test) classifiers = [ ("Shapelet forest", ShapeletForestClassifier(random_state=random_state)), ("Nearest neighbors", KNeighborsClassifier()), ] classes = np.unique(y)
import numpy as np from sklearn.neighbors import KNeighborsClassifier from wildboar.datasets import load_dataset from wildboar.explain.counterfactual import counterfactuals x_train, x_test, y_train, y_test = load_dataset("GunPoint", repository="wildboar/ucr", merge_train_test=False) clf = KNeighborsClassifier(n_neighbors=1, metric="euclidean") clf.fit(x_train, y_train) x_counter, success, scores = counterfactuals(clf, x_test, y_test[::-1], random_state=123, scoring="euclidean") print(scores) print(np.sum(success)) print(np.sum(clf.predict(x_counter) == y_test[::-1]) / y_test.shape[0])
n_neighbors=1, metric="euclidean", ), "Shapelet forest": ShapeletForestClassifier( n_shapelets=10, metric="scaled_euclidean", random_state=random_state, n_jobs=-1, ), "Extra shapelet trees": ExtraShapeletTreesClassifier( metric="scaled_euclidean", n_jobs=-1, random_state=random_state, ), } repository = "wildboar/ucr-tiny" datasets = list_datasets(repository) df = pd.DataFrame(columns=classifiers.keys(), index=datasets, dtype=float) for dataset in datasets: print(dataset) x, y = load_dataset(dataset, repository=repository) for clf_name, clf in classifiers.items(): print(" ", clf_name) score = cross_validate(clf, x, y, scoring="roc_auc_ovo", n_jobs=1) df.loc[dataset, clf_name] = np.mean(score["test_score"]) df.to_csv("../tab/classification_cmp.csv", float_format="%.3f")
import numpy as np from sklearn.metrics import roc_auc_score from sklearn.model_selection import train_test_split from wildboar.datasets import load_dataset from wildboar.datasets.outlier import EmmottLabeler from wildboar.ensemble import IsolationShapeletForest x, y = load_dataset("SwedishLeaf", repository="wildboar/ucr", merge_train_test=True) labeler = EmmottLabeler(n_outliers=0.05, difficulty=1, variation="tight", random_state=5) x, y = labeler.fit_transform(x, y) print(np.unique(y, return_counts=True)) x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=10, test_size=0.2, stratify=y) f = IsolationShapeletForest( random_state=10, n_estimators=100, contamination=0.1, metric="scaled_euclidean", n_jobs=-1, min_shapelet_size=0, max_shapelet_size=1,
import matplotlib.pylab as plt import numpy as np from wildboar.datasets import load_dataset from wildboar.embed import IntervalEmbedding x, y = load_dataset("GunPoint") fixed = IntervalEmbedding(n_interval=30, summarizer="auto", intervals="fixed") x_t = fixed.fit_transform(x) labels = ["%s" % start for (dim, (start, length, _)) in fixed.embedding_.features] n_features = x_t.shape[1] fig, ax = plt.subplots(nrows=4, figsize=(5.5, 7)) ax[0].plot(x[0]) ax[0].title.set_text("Time series") colors = plt.cm.rainbow(np.linspace(0, 1, 3)) titles = ["Mean", "Variance", "Slope"] for i in range(3): ax[i + 1].bar(labels, x_t[0, i:n_features:3], color=colors[i, :]) plt.setp(ax[i + 1].get_xticklabels(), rotation="vertical", ha="center") ax[i + 1].title.set_text(titles[i]) plt.tight_layout() # plt.xlim([0, 150]) plt.savefig("../fig/interval.png")
def test_apply(): x_train, x_test, y_train, y_test = load_dataset( "GunPoint", repository="wildboar/ucr-tiny", merge_train_test=False) f = ShapeletTreeClassifier(random_state=123) f.fit(x_test, y_test) actual_apply = f.apply(x_train) expected_apply = np.array( [ 29, 24, 7, 7, 24, 24, 34, 34, 20, 5, 12, 5, 5, 5, 24, 5, 15, 29, 5, 32, 12, 5, 5, 34, 2, 20, 5, 5, 20, 32, 19, 34, 20, 7, 24, 2, 33, 24, 34, 32, 15, 5, 5, 5, 32, 24, 29, 34, 29, 34, ], dtype=int, ) assert actual_apply.dtype == np.intp assert_array_equal(actual_apply, expected_apply)