Пример #1
0
def test_constrained_paths():
    n, sz, d = 15, 10, 3
    rng = np.random.RandomState(0)
    X = rng.randn(n, sz, d)
    y = rng.randint(low=0, high=3, size=n)

    model_euc = KNeighborsTimeSeriesClassifier(n_neighbors=3,
                                               metric="euclidean")
    y_pred_euc = model_euc.fit(X, y).predict(X)
    model_dtw_sakoe = KNeighborsTimeSeriesClassifier(n_neighbors=3,
                                                     metric="dtw",
                                                     metric_params={
                                                         "global_constraint":
                                                         "sakoe_chiba",
                                                         "sakoe_chiba_radius":
                                                         0
                                                     })
    y_pred_sakoe = model_dtw_sakoe.fit(X, y).predict(X)
    np.testing.assert_equal(y_pred_euc, y_pred_sakoe)

    model_softdtw = KNeighborsTimeSeriesClassifier(
        n_neighbors=3, metric="softdtw", metric_params={"gamma": 1e-6})
    y_pred_softdtw = model_softdtw.fit(X, y).predict(X)

    model_dtw = KNeighborsTimeSeriesClassifier(n_neighbors=3, metric="dtw")
    y_pred_dtw = model_dtw.fit(X, y).predict(X)

    np.testing.assert_equal(y_pred_dtw, y_pred_softdtw)

    model_ctw = KNeighborsTimeSeriesClassifier(n_neighbors=3, metric="ctw")
    # Just testing that things run, nothing smart here :(
    model_ctw.fit(X, y).predict(X)

    model_sax = KNeighborsTimeSeriesClassifier(n_neighbors=3,
                                               metric="sax",
                                               metric_params={
                                                   "alphabet_size_avg": 6,
                                                   "n_segments": 10
                                               })
    model_sax.fit(X, y)

    # The MINDIST of SAX is a lower bound of the euclidean distance
    euc_dist, _ = model_euc.kneighbors(X, n_neighbors=5)
    sax_dist, _ = model_sax.kneighbors(X, n_neighbors=5)

    # First column will contain zeroes
    np.testing.assert_array_less(sax_dist[:, 1:], euc_dist[:, 1:])
Пример #2
0
class kNNClassifier:
    def __init__(self,
                 n_neighbours=5,
                 mac_neighbours=None,
                 weights="uniform",
                 metric_params={},
                 n_jobs=-1):
        self.n_neighbours = n_neighbours
        self.mac_neighbours = mac_neighbours
        self.weights = weights
        self.metric_params = metric_params
        self.n_jobs = n_jobs

    def get_params(self, deep=True):
        return {
            "n_neighbours": self.n_neighbours,
            "mac_neighbours": self.mac_neighbours,
            "weights": self.weights,
            "metric_params": self.metric_params,
            "n_jobs": self.n_jobs
        }

    def set_params(self, **parameters):
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
        return self

    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train

        self.model = KNeighborsTimeSeriesClassifier(
            n_neighbors=self.n_neighbours,
            metric="euclidean",
            weights=self.weights,
            n_jobs=self.n_jobs).fit(self.X_train, self.y_train)
        return self

    def predict(self, X_test):
        if self.mac_neighbours is None:
            return self.model.predict(X_test)
        else:
            y_hat = []
            k_neighbors = self.model.kneighbors(
                X_test, n_neighbors=self.mac_neighbours, return_distance=False)
            for idx, k in enumerate(k_neighbors):
                X_train = self.X_train[k]
                y_train = self.y_train[k]
                self.model = KNeighborsTimeSeriesClassifier(
                    n_neighbors=self.n_neighbours,
                    metric="dtw",
                    weights=self.weights,
                    n_jobs=self.n_jobs,
                    metric_params=self.metric_params).fit(X_train, y_train)
                pred = self.model.predict(X_test[idx])
                y_hat.append(pred)
        return y_hat
Пример #3
0
class AnomalyDetection(ClassifierMixin, BaseEstimator):
    """
    Anomaly detection with 1-NN and automatic calculation of optimal threshold.
    """
    def __init__(self, n_clusters=200):
        self.knn = KNeighborsTimeSeriesClassifier(n_neighbors=1,
                                                  weights='uniform',
                                                  metric='euclidean',
                                                  n_jobs=-1)
        self.d = None
        self.n_clusters = n_clusters

    def fit(self, X, y):
        """
        Fit the algorithm according to the given training data.
        Parameters
        ----------
        X : array-like of shape (n_samples, n_features, n_channels)
            Training samples.
        y : array-like of shape (n_samples,)
            True labels for X.
        Returns
        -------
        self: object
            Fitted model
        """
        # Fit anomaly detection knn over k-means centroids
        X_good = X[np.where(y == 0)]
        X_bad = X[np.where(y != 0)]
        km = TimeSeriesKMeans(n_clusters=self.n_clusters,
                              metric="euclidean",
                              max_iter=100,
                              random_state=0,
                              n_jobs=-1).fit(X_good)
        self.knn.fit(km.cluster_centers_, np.zeros((self.n_clusters, )))

        # Calculate distances to all samples in good and bad
        d_bad, _ = self.knn.kneighbors(X_bad)
        d_good, _ = self.knn.kneighbors(X_good)

        # Calculate ROC
        y_true = np.hstack(
            (np.zeros(X_good.shape[0]), np.ones(X_bad.shape[0])))
        y_score = np.vstack((d_good, d_bad))
        fpr, tpr, thresholds = roc_curve(y_true, y_score, pos_label=1)

        # Determine d by Youden index
        self.d = thresholds[np.argmax(tpr - fpr)]
        return self

    def predict(self, X):
        """
        Perform a classification on samples in X.
        Parameters
        ----------
        X : array-like of shape (n_samples, n_features, n_channels)
            Test samples.
        Returns
        -------
        y_pred: array, shape (n_samples,)
            Predictions
        """
        # Binary predictions of anomaly detector
        y_pred = np.squeeze(np.where(self.knn.kneighbors(X)[0] < self.d, 0, 1))
        return y_pred