示例#1
0
def test_dunder_mul():
    """Test the mul dunder method."""
    RAND_SEED = 42
    y = _make_classification_y(n_instances=10, random_state=RAND_SEED)
    X = _make_panel_X(n_instances=10,
                      n_timepoints=20,
                      random_state=RAND_SEED,
                      y=y)
    X_test = _make_panel_X(n_instances=5,
                           n_timepoints=20,
                           random_state=RAND_SEED)

    t1 = ExponentTransformer(power=4)
    t2 = ExponentTransformer(power=0.25)

    c = KNeighborsTimeSeriesClassifier()
    t12c_1 = t1 * (t2 * c)
    t12c_2 = (t1 * t2) * c
    t12c_3 = t1 * t2 * c

    assert isinstance(t12c_1, ClassifierPipeline)
    assert isinstance(t12c_2, ClassifierPipeline)
    assert isinstance(t12c_3, ClassifierPipeline)

    y_pred = c.fit(X, y).predict(X_test)

    _assert_array_almost_equal(y_pred, t12c_1.fit(X, y).predict(X_test))
    _assert_array_almost_equal(y_pred, t12c_2.fit(X, y).predict(X_test))
    _assert_array_almost_equal(y_pred, t12c_3.fit(X, y).predict(X_test))
def main():
    generator = DataGenerator(labeled_data_file=args.labeled_data_file, data_util_file=args.data_util_file,
                              threshold=args.threshold, dt=args.dt, L=args.L, tmin=args.tmin, tmax=args.tmax)
    training_data, test_data = generator.get_data(ts_nth_element=args.ts_nth_element,
                                                                   training_frac=0.7)
    knn = KNeighborsTimeSeriesClassifier(n_neighbors=args.n_neighbors, verbose=1, metric="dtw")
    x = detabularize(pd.DataFrame(training_data[:,1:]))
    try:
        with parallel_backend('threading', n_jobs=args.n_jobs):
            knn = knn.fit(x, training_data[:,0])
        with open('{save_file_name}.pickle'.format(save_file_name=args.save_file_name), 'wb') \
                as KNeighborsTimeSeriesModel:
            pickle.dump(knn, KNeighborsTimeSeriesModel, protocol=pickle.HIGHEST_PROTOCOL)
    except Exception as ex:
        print(ex)
示例#3
0
def test_mul_sklearn_autoadapt():
    """Test auto-adapter for sklearn in mul."""
    RAND_SEED = 42
    y = _make_classification_y(n_instances=10, random_state=RAND_SEED)
    X = _make_panel_X(n_instances=10,
                      n_timepoints=20,
                      random_state=RAND_SEED,
                      y=y)
    X_test = _make_panel_X(n_instances=10,
                           n_timepoints=20,
                           random_state=RAND_SEED)

    t1 = ExponentTransformer(power=2)
    t2 = StandardScaler()
    c = KNeighborsTimeSeriesClassifier()

    t12c_1 = t1 * (t2 * c)
    t12c_2 = (t1 * t2) * c
    t12c_3 = t1 * t2 * c

    assert isinstance(t12c_1, ClassifierPipeline)
    assert isinstance(t12c_2, ClassifierPipeline)
    assert isinstance(t12c_3, ClassifierPipeline)

    y_pred = t12c_1.fit(X, y).predict(X_test)

    _assert_array_almost_equal(y_pred, t12c_2.fit(X, y).predict(X_test))
    _assert_array_almost_equal(y_pred, t12c_3.fit(X, y).predict(X_test))
示例#4
0
    def get_test_params(cls, parameter_set="default"):
        """Return testing parameter settings for the estimator.

        Parameters
        ----------
        parameter_set : str, default="default"
            Name of the set of test parameters to return, for use in tests. If no
            special parameters are defined for a value, will return `"default"` set.
            For classifiers, a "default" set of parameters should be provided for
            general testing, and a "results_comparison" set for comparing against
            previously recorded results if the general set does not produce suitable
            probabilities to compare against.

        Returns
        -------
        params : dict or list of dict, default={}
            Parameters to create testing instances of the class.
            Each dict are parameters to construct an "interesting" test instance, i.e.,
            `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
            `create_test_instance` uses the first (or only) dictionary in `params`.
        """
        # imports
        from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier
        from sktime.transformations.series.exponent import ExponentTransformer

        t1 = ExponentTransformer(power=2)
        t2 = ExponentTransformer(power=0.5)
        c = KNeighborsTimeSeriesClassifier()

        # construct without names
        return {"transformers": [t1, t2], "classifier": c}
示例#5
0
    def fit(self, X, y):
        """
        Method to perform training on the classifier.

        Parameters
        ----------
        X - pandas dataframe of training data of shape [n_instances,1].
        y - list of class labels of shape [n_instances].

        Returns
        -------
        self : the shapeDTW object
        """
        # Perform preprocessing on params.
        if not (isinstance(self.shape_descriptor_function, str)):
            raise TypeError(
                "shape_descriptor_function must be an 'str'. \
                            Found '"
                + type(self.shape_descriptor_function).__name__
                + "' instead."
            )

        X, y = check_X_y(X, y, enforce_univariate=False)

        if self.metric_params is None:
            self.metric_params = {}

        # If the shape descriptor is 'compound',
        # calculate the appropriate weighting_factor
        if self.shape_descriptor_function == "compound":
            self._calculate_weighting_factor_value(X, y)

        # Fit the SlidingWindowSegmenter
        sw = SlidingWindowSegmenter(self.subsequence_length)
        sw.fit(X)
        self.sw = sw

        # Transform the training data.
        X = self._preprocess(X)

        # Fit the kNN classifier
        self.knn = KNeighborsTimeSeriesClassifier(n_neighbors=self.n_neighbors)
        self.knn.fit(X, y)
        self.classes_ = self.knn.classes_

        return self
示例#6
0
    def fit(self, data):
        if data.kfold > 1:
            cv_eval = {}
            for k, cv_fold in enumerate(data.Xy_train.keys()):
#                 print('    cv_fold: ', cv_fold)
                [(X_train, y_train), (X_val, y_val)] = data.Xy_train[cv_fold]
                X_train, X_val = from_2d_array_to_nested(X_train), from_2d_array_to_nested(X_val)
                knn = KNeighborsTimeSeriesClassifier(n_neighbors=5, distance="dtw", n_jobs=-1)
                knn.fit(X_train, y_train)
                eval_metrics = weareval.eval_output(knn.predict(X_val), y_val, tasktype=data.tasktype)
                cv_eval[cv_fold] = {'model': knn, 
                                    # 'data': [(X_train, y_train), (X_val, y_val)], # store just IDs?
                                    'metric': eval_metrics['mae'] if data.tasktype=='regression' else eval_metrics['balanced_acc_adj'],
                                    'metrics': eval_metrics}
            # retain only best model
            tmp = {cv_fold:cv_eval[cv_fold]['metric'] for cv_fold in cv_eval.keys()}
            bst_fold = min(tmp, key=tmp.get) if data.tasktype=='regression' else max(tmp, key=tmp.get)
            self.knn = cv_eval[bst_fold]['model']
            return {'model': self.knn, 'metrics': cv_eval[bst_fold]['metrics']}
        else:
            X_train, y_train = data.Xy_train
            X_val, y_val = data.Xy_val
            X_train, X_val = from_2d_array_to_nested(X_train), from_2d_array_to_nested(X_val)
            self.knn = knn = KNeighborsTimeSeriesClassifier(n_neighbors=5, distance="dtw", n_jobs=-1)
            self.knn.fit(X_train, y_train)
            eval_metrics = weareval.eval_output(self.knn.predict(X_val), y_val, tasktype=data.tasktype)
            return {'model': self.knn, 'metrics': eval_metrics}
示例#7
0
def test_missing_unequal_tag_inference():
    """Test that ClassifierPipeline infers missing/unequal tags correctly."""
    c = KNeighborsTimeSeriesClassifier()
    c1 = ExponentTransformer() * PaddingTransformer() * ExponentTransformer(
    ) * c
    c2 = ExponentTransformer() * ExponentTransformer() * c
    c3 = Imputer() * ExponentTransformer() * c
    c4 = ExponentTransformer() * Imputer() * c

    assert c1.get_tag("capability:unequal_length")
    assert not c2.get_tag("capability:unequal_length")
    assert c3.get_tag("capability:missing_values")
    assert not c4.get_tag("capability:missing_values")
def main():
    #Load arff file into Tuple of size 2.
    #First element has the time-series data in arrays and Second element has the description of the attributes
    TRAIN = arff.loadarff('ItalyPowerDemand_TRAIN.arff')
    TEST = arff.loadarff('ItalyPowerDemand_TEST.arff')
    #Convert the data from the first Tuple elemento to a tabularized dataframe
    df_TRAIN = pd.DataFrame(TRAIN[0])
    df_TEST = pd.DataFrame(TEST[0])

    #Using sktime to handle the data
    print(df_TRAIN.head())
    print('\n Is nested the df above?', is_nested_dataframe(df_TRAIN), '\n')

    #Handling the datasets
    X_train = df_TRAIN.drop('target', axis=1)
    y_train = df_TRAIN['target'].astype(int)
    print(X_train.head(), y_train.head(), '\n')
    X_test = df_TEST.drop('target', axis=1)
    y_test = df_TEST['target'].astype(int)

    #Detabularizing and Nesting X_train, X_test
    X_train_detab = detabularize(X_train)
    X_test_detab = detabularize(X_test)
    print(X_train_detab.head())
    print('Is nested the detabularized df above?',
          is_nested_dataframe(X_train_detab), '\n')

    #The lines above could be simplified with the following method from sktime
    X, y = load_from_arff_to_dataframe('ItalyPowerDemand_TRAIN.arff')
    print(X_train_detab.head(), X.head(), type(y_train), type(y))

    #Classifier algorithm
    knn = KNeighborsTimeSeriesClassifier(n_neighbors=1, metric="dtw")
    knn.fit(X_train_detab, y_train)
    print('The score of the KNN classifier is:',
          round(knn.score(X_test_detab, y_test), 4))
示例#9
0
def test_stat():
    """Test sign ranks."""
    data = load_gunpoint(split="train", return_X_y=False)
    dataset = RAMDataset(dataset=data, name="gunpoint")
    task = TSCTask(target="class_val")

    fc = ComposableTimeSeriesForestClassifier(n_estimators=1, random_state=1)
    strategy_fc = TSCStrategy(fc, name="tsf")
    pf = KNeighborsTimeSeriesClassifier()
    strategy_pf = TSCStrategy(pf, name="pf")

    # result backend
    results = RAMResults()
    orchestrator = Orchestrator(
        datasets=[dataset],
        tasks=[task],
        strategies=[strategy_pf, strategy_fc],
        cv=SingleSplit(random_state=1),
        results=results,
    )

    orchestrator.fit_predict(save_fitted_strategies=False)

    analyse = Evaluator(results)
    metric = PairwiseMetric(func=accuracy_score, name="accuracy")
    _ = analyse.evaluate(metric=metric)

    ranks = analyse.rank(ascending=True)
    pf_rank = ranks.loc[ranks.strategy == "pf",
                        "accuracy_mean_rank"].item()  # 1
    fc_rank = ranks.loc[ranks.strategy == "tsf",
                        "accuracy_mean_rank"].item()  # 2
    rank_array = [pf_rank, fc_rank]
    rank_array_test = [1, 2]
    _, sign_test_df = analyse.sign_test()

    sign_array = [
        [sign_test_df["pf"][0], sign_test_df["pf"][1]],
        [sign_test_df["tsf"][0], sign_test_df["tsf"][1]],
    ]
    sign_array_test = [[1, 1], [1, 1]]
    np.testing.assert_equal([rank_array, sign_array],
                            [rank_array_test, sign_array_test])
示例#10
0
def set_classifier(cls, resampleId=None):
    """
    Basic way of creating the classifier to build using the default settings. This
    set up is to help with batch jobs for multiple problems to facilitate easy
    reproducability. You can set up bespoke classifier in many other ways.

    :param cls: String indicating which classifier you want
    :param resampleId: classifier random seed

    :return: A classifier.

    """
    name = cls.lower()
    # Distance based
    if name == "pf" or name == "proximityforest":
        return ProximityForest(random_state=resampleId)
    elif name == "pt" or name == "proximitytree":
        return ProximityTree(random_state=resampleId)
    elif name == "ps" or name == "proximityStump":
        return ProximityStump(random_state=resampleId)
    elif name == "dtwcv" or name == "kneighborstimeseriesclassifier":
        return KNeighborsTimeSeriesClassifier(distance="dtwcv")
    elif name == "dtw" or name == "1nn-dtw":
        return KNeighborsTimeSeriesClassifier(distance="dtw")
    elif name == "msm" or name == "1nn-msm":
        return KNeighborsTimeSeriesClassifier(distance="msm")
    elif name == "ee" or name == "elasticensemble":
        return ElasticEnsemble()
    elif name == "shapedtw":
        return ShapeDTW()
    # Dictionary based
    elif name == "boss" or name == "bossensemble":
        return BOSSEnsemble(random_state=resampleId)
    elif name == "cboss" or name == "contractableboss":
        return ContractableBOSS(random_state=resampleId)
    elif name == "tde" or name == "temporaldictionaryensemble":
        return TemporalDictionaryEnsemble(random_state=resampleId)
    elif name == "weasel":
        return WEASEL(random_state=resampleId)
    elif name == "muse":
        return MUSE(random_state=resampleId)
    # Interval based
    elif name == "rise" or name == "randomintervalspectralforest":
        return RandomIntervalSpectralForest(random_state=resampleId)
    elif name == "tsf" or name == "timeseriesforestclassifier":
        return TimeSeriesForestClassifier(random_state=resampleId)
    elif name == "cif" or name == "canonicalintervalforest":
        return CanonicalIntervalForest(random_state=resampleId)
    elif name == "drcif":
        return DrCIF(random_state=resampleId)
    # Shapelet based
    elif name == "stc" or name == "shapelettransformclassifier":
        return ShapeletTransformClassifier(
            random_state=resampleId, time_contract_in_mins=1
        )
    elif name == "mrseql" or name == "mrseqlclassifier":
        return MrSEQLClassifier(seql_mode="fs", symrep=["sax", "sfa"])
    elif name == "rocket":
        return ROCKETClassifier(random_state=resampleId)
    elif name == "arsenal":
        return Arsenal(random_state=resampleId)
    # Hybrid
    elif name == "catch22":
        return Catch22ForestClassifier(random_state=resampleId)
    elif name == "hivecotev1":
        return HIVECOTEV1(random_state=resampleId)
    else:
        raise Exception("UNKNOWN CLASSIFIER")
示例#11
0
class ShapeDTW(BaseClassifier):
    """ShapeDTW classifier.

    ShapeDTW[1] works by initially extracting a set of subsequences
    describing local neighbourhoods around each data point in a time series.
    These subsequences are then passed into a shape descriptor function that
    transforms these local neighbourhoods into a new representation. This
    new representation is then sent into DTW with 1-NN.

    Parameters
    ----------
    n_neighbours                : int, int, set k for knn (default =1).
    subsequence_length          : int, defines the length of the
                                  subsequences(default=sqrt(n_timepoints)).

    shape_descriptor_function   : string, defines the function to describe
                                  the set of subsequences
                                  (default = 'raw').


    The possible shape descriptor functions are as follows:

        - 'raw'                 : use the raw subsequence as the
                                  shape descriptor function.
                                - params = None

        - 'paa'                 : use PAA as the shape descriptor function.
                                - params = num_intervals_paa (default=8)

        - 'dwt'                 : use DWT (Discrete Wavelet Transform)
                                  as the shape descriptor function.
                                - params = num_levels_dwt (default=3)

        - 'slope'               : use the gradient of each subsequence
                                  fitted by a total least squares
                                  regression as the shape descriptor
                                  function.
                                - params = num_intervals_slope (default=8)

        - 'derivative'          : use the derivative of each subsequence
                                  as the shape descriptor function.
                                - params = None

        - 'hog1d'               : use a histogram of gradients in one
                                  dimension as the shape desciptor
                                  function.
                                - params = num_intervals_hog1d
                                                    (defualt=2)
                                         = num_bins_hod1d
                                                    (default=8)
                                         = scaling_factor_hog1d
                                                    (default=0.1)

        - 'compound'            : use a combination of two shape
                                  descriptors simultaneously.
                                - params = weighting_factor
                                          (default=None)
                                           Defines how to scale
                                           values of a shape
                                           descriptor.
                                           If a value is not given,
                                           this value is tuned
                                           by 10-fold cross-validation
                                           on the training data.


    shape_descriptor_functions  : string list, only applicable when the
                                  shape_descriptor_function is
                                  set to 'compound'.
                                  Use a list of shape descriptor
                                  functions at the same time.
                                  (default = ['raw','derivative'])

    metric_params               : dictionary for metric parameters
                                  (default = None).

    Notes
    -----
    ..[1] Jiaping Zhao and Laurent Itti, "shapeDTW: Shape Dynamic Time Warping",
        Pattern Recognition, 74, pp 171-184, 2018
        http://www.sciencedirect.com/science/article/pii/S0031320317303710,

    """
    def __init__(
        self,
        n_neighbours=1,
        subsequence_length=30,
        shape_descriptor_function="raw",
        shape_descriptor_functions=["raw",
                                    "derivative"],  # noqa from flake8 B006
        metric_params=None,
    ):
        self.n_neighbors = n_neighbours
        self.subsequence_length = subsequence_length
        self.shape_descriptor_function = shape_descriptor_function
        self.shape_descriptor_functions = shape_descriptor_functions
        self.metric_params = metric_params
        super(ShapeDTW, self).__init__()

    def _fit(self, X, y):
        """Train the classifier.

        Parameters
        ----------
        X - pandas dataframe of training data of shape [n_instances,1].
        y - list of class labels of shape [n_instances].

        Returns
        -------
        self : the shapeDTW object
        """
        # Perform preprocessing on params.
        if not (isinstance(self.shape_descriptor_function, str)):
            raise TypeError("shape_descriptor_function must be an 'str'. \
                            Found '" +
                            type(self.shape_descriptor_function).__name__ +
                            "' instead.")

        if self.metric_params is None:
            self.metric_params = {}

        # If the shape descriptor is 'compound',
        # calculate the appropriate weighting_factor
        if self.shape_descriptor_function == "compound":
            self._calculate_weighting_factor_value(X, y)

        # Fit the SlidingWindowSegmenter
        sw = SlidingWindowSegmenter(self.subsequence_length)
        sw.fit(X)
        self.sw = sw

        # Transform the training data.
        X = self._preprocess(X)

        # Fit the kNN classifier
        self.knn = KNeighborsTimeSeriesClassifier(n_neighbors=self.n_neighbors)
        self.knn.fit(X, y)
        self.classes_ = self.knn.classes_

        return self

    def _calculate_weighting_factor_value(self, X, y):
        """Calculate the appropriate weighting_factor.

        Check for the compound shape descriptor.
        If a value is given, the weighting_factor is set
        as the given value. If not, its tuned via
        a 10-fold cross-validation on the training data.

        Parameters
        ----------
        X - training data in a dataframe of shape [n_instances,1]
        y - training data classes of shape [n_instances].
        """
        self.metric_params = {
            k.lower(): v
            for k, v in self.metric_params.items()
        }

        # Get the weighting_factor if one is provided
        if self.metric_params.get("weighting_factor") is not None:
            self.weighting_factor = self.metric_params.get("weighting_factor")
        else:
            # Tune it otherwise
            self._param_matrix = {
                "metric_params": [
                    {
                        "weighting_factor": 0.1
                    },
                    {
                        "weighting_factor": 0.125
                    },
                    {
                        "weighting_factor": (1 / 6)
                    },
                    {
                        "weighting_factor": 0.25
                    },
                    {
                        "weighting_factor": 0.5
                    },
                    {
                        "weighting_factor": 1
                    },
                    {
                        "weighting_factor": 2
                    },
                    {
                        "weighting_factor": 4
                    },
                    {
                        "weighting_factor": 6
                    },
                    {
                        "weighting_factor": 8
                    },
                    {
                        "weighting_factor": 10
                    },
                ]
            }

            n = self.n_neighbors
            sl = self.subsequence_length
            sdf = self.shape_descriptor_function
            sdfs = self.shape_descriptor_functions
            if sdfs is None or not (len(sdfs) == 2):
                raise ValueError("When using 'compound', " +
                                 "shape_descriptor_functions must be a " +
                                 "string array of length 2.")
            mp = self.metric_params

            grid = GridSearchCV(
                estimator=ShapeDTW(
                    n_neighbours=n,
                    subsequence_length=sl,
                    shape_descriptor_function=sdf,
                    shape_descriptor_functions=sdfs,
                    metric_params=mp,
                ),
                param_grid=self._param_matrix,
                cv=KFold(n_splits=10, shuffle=True),
                scoring="accuracy",
            )
            grid.fit(X, y)
            self.weighting_factor = grid.best_params_["metric_params"][
                "weighting_factor"]

    def _preprocess(self, X):
        # private method for performing the transformations on
        # the test/training data. It extracts the subsequences
        # and then performs the shape descriptor function on
        # each subsequence.
        X = self.sw.transform(X)

        # Feed X into the appropriate shape descriptor function
        X = self._generate_shape_descriptors(X)

        return X

    def _predict_proba(self, X):
        """Perform predictions on the testing data X.

        This function returns the probabilities for each class.

        Parameters
        ----------
        X - pandas dataframe of testing data of shape [n_instances,1].

        Returns
        -------
        output : numpy array of shape =
                [n_instances, num_classes] of probabilities
        """
        # Transform the test data in the same way as the training data.
        X = self._preprocess(X)

        # Classify the test data
        return self.knn.predict_proba(X)

    def _predict(self, X):
        """Find predictions for all cases in X.

        Parameters
        ----------
        X : The testing input samples of shape [n_instances,1].

        Returns
        -------
        output : numpy array of shape = [n_instances]
        """
        # Transform the test data in the same way as the training data.
        X = self._preprocess(X)

        # Classify the test data
        return self.knn.predict(X)

    def _generate_shape_descriptors(self, data):
        """Generate shape descriptors.

        This function is used to convert a list of
        subsequences into a list of shape descriptors
        to be used for classification.
        """
        # Get the appropriate transformer objects
        if self.shape_descriptor_function != "compound":
            self.transformer = [
                self._get_transformer(self.shape_descriptor_function)
            ]
        else:
            self.transformer = []
            for x in self.shape_descriptor_functions:
                self.transformer.append(self._get_transformer(x))
            if not (len(self.transformer) == 2):
                raise ValueError("When using 'compound', " +
                                 "shape_descriptor_functions must be a " +
                                 "string array of length 2.")

        # To hold the result of each transformer
        dataFrames = []
        col_names = [x for x in range(len(data.columns))]

        # Apply each transformer on the set of subsequences
        for t in self.transformer:
            if t is None:
                # Do no transformations
                dataFrames.append(data)
            else:
                # Do the transformation and extract the resulting data frame.
                t.fit(data)
                newData = t.transform(data)
                dataFrames.append(newData)

        # Combine the arrays into one dataframe
        if self.shape_descriptor_function == "compound":
            result = self._combine_data_frames(dataFrames,
                                               self.weighting_factor,
                                               col_names)
        else:
            result = dataFrames[0]
            result.columns = col_names

        return result

    def _get_transformer(self, tName):
        """Extract the appropriate transformer.

        Parameters
        ----------
        self   : the ShapeDTW object.
        tName  : the name of the required transformer.

        Returns
        -------
        output : Base Transformer object corresponding to the class
                 (or classes if its a compound transformer) of the
                 required transformer. The transformer is
                 configured with the parameters given in self.metric_params.

        throws : ValueError if a shape descriptor doesn't exist.
        """
        parameters = self.metric_params

        tName = tName.lower()

        if parameters is None:
            parameters = {}

        parameters = {k.lower(): v for k, v in parameters.items()}

        self._check_metric_params(parameters)

        if tName == "raw":
            return None
        elif tName == "paa":
            num_intervals = parameters.get("num_intervals_paa")
            if num_intervals is None:
                return PAA()
            return PAA(num_intervals)
        elif tName == "dwt":
            num_levels = parameters.get("num_levels_dwt")
            if num_levels is None:
                return DWTTransformer()
            return DWTTransformer(num_levels)
        elif tName == "slope":
            num_intervals = parameters.get("num_intervals_slope")
            if num_intervals is None:
                return SlopeTransformer()
            return SlopeTransformer(num_intervals)
        elif tName == "derivative":
            return DerivativeSlopeTransformer()
        elif tName == "hog1d":
            num_intervals = parameters.get("num_intervals_hog1d")
            num_bins = parameters.get("num_bins_hog1d")
            scaling_factor = parameters.get("scaling_factor_hog1d")

            # All 3 paramaters are None
            if num_intervals is None and num_bins is None and scaling_factor is None:
                return HOG1DTransformer()

            # 2 parameters are None
            if num_intervals is None and num_bins is None:
                return HOG1DTransformer(scaling_factor=scaling_factor)
            if num_intervals is None and scaling_factor is None:
                return HOG1DTransformer(num_bins=num_bins)
            if num_bins is None and scaling_factor is None:
                return HOG1DTransformer(num_intervals=num_intervals)

            # 1 parameter is None
            if num_intervals is None:
                return HOG1DTransformer(scaling_factor=scaling_factor,
                                        num_bins=num_bins)
            if scaling_factor is None:
                return HOG1DTransformer(num_intervals=num_intervals,
                                        num_bins=num_bins)
            if num_bins is None:
                return HOG1DTransformer(scaling_factor=scaling_factor,
                                        num_intervals=num_intervals)

            # All parameters are given
            return HOG1DTransformer(
                num_intervals=num_intervals,
                num_bins=num_bins,
                scaling_factor=scaling_factor,
            )
        else:
            raise ValueError("Invalid shape desciptor function.")

    def _check_metric_params(self, parameters):
        """Check for an invalid metric_params."""
        valid_metric_params = [
            "num_intervals_paa",
            "num_levels_dwt",
            "num_intervals_slope",
            "num_intervals_hog1d",
            "num_bins_hog1d",
            "scaling_factor_hog1d",
            "weighting_factor",
        ]

        names = list(parameters.keys())

        for x in names:
            if not (x in valid_metric_params):
                raise ValueError(x + " is not a valid metric parameter." +
                                 "Make sure the shape descriptor function" +
                                 " name is at the end of the metric " +
                                 "parameter name.")

    def _combine_data_frames(self, dataFrames, weighting_factor, col_names):
        """Combine two dataframes together into a single dataframe.

        Used when the shape_descriptor_function is set to "compound".
        """
        first_desc = dataFrames[0]
        second_desc = dataFrames[1]

        first_desc_array = []
        second_desc_array = []

        # Convert the dataframes into arrays
        for x in first_desc.columns:
            first_desc_array.append(
                from_nested_to_2d_array(first_desc[x], return_numpy=True))

        for x in second_desc.columns:
            second_desc_array.append(
                from_nested_to_2d_array(second_desc[x], return_numpy=True))

        # Concatenate the arrays together
        res = []
        for x in range(len(first_desc_array)):
            dim1 = []
            for y in range(len(first_desc_array[x])):
                dim2 = []
                dim2.extend(first_desc_array[x][y])
                dim2.extend(second_desc_array[x][y] * weighting_factor)
                dim1.append(dim2)
            res.append(dim1)

        res = np.asarray(res)

        # Convert to pandas dataframe
        df = pd.DataFrame()

        for col in col_names:
            colToAdd = []
            for row in range(len(res[col])):
                inst = res[col][row]
                colToAdd.append(pd.Series(inst))
            df[col] = colToAdd
        return df
示例#12
0
# -------------- SETUP ------------------------------------------

# Dataset path folder
DATA_PATH = os.path.join(os.path.dirname(__file__), "Datasets")

# Datasets paths
# [ [train set path, test set path, dataset name], ...]
datasets_path = [[
    "RacketSports/RacketSports_TRAIN.ts", "RacketSports/RacketSports_TEST.ts",
    "RacketSport"
]]

# Setup classifier
# [ [classifier, classifier name], ...]
classifiers = [[
    KNeighborsTimeSeriesClassifier(1, 'uniform', 'brute', 'dtw', None),
    "DTW-1NN"
],
               [
                   KNeighborsTimeSeriesClassifier(4, 'uniform', 'brute', 'dtw',
                                                  None), "DTW-4NN"
               ]]

# --------------- MAIN PROGRAM ---------------------------------

# Load data
# [ ((train_data, train_class), (test_data, test_class), dataset name), ...]
data = []
for train_path, test_path, name in datasets_path:
    data += [
        (load_from_tsfile_to_dataframe(os.path.join(DATA_PATH, train_path)),
示例#13
0
def set_classifier(cls, resample_id=None, train_file=False):
    """Construct a classifier.

    Basic way of creating the classifier to build using the default settings. This
    set up is to help with batch jobs for multiple problems to facilitate easy
    reproducibility for use with load_and_run_classification_experiment. You can pass a
    classifier object instead to run_classification_experiment.

    Parameters
    ----------
    cls : str
        String indicating which classifier you want.
    resample_id : int or None, default=None
        Classifier random seed.
    train_file : bool, default=False
        Whether a train file is being produced.

    Return
    ------
    classifier : A BaseClassifier.
        The classifier matching the input classifier name.
    """
    name = cls.lower()
    # Dictionary based
    if name == "boss" or name == "bossensemble":
        return BOSSEnsemble(random_state=resample_id)
    elif name == "cboss" or name == "contractableboss":
        return ContractableBOSS(random_state=resample_id)
    elif name == "tde" or name == "temporaldictionaryensemble":
        return TemporalDictionaryEnsemble(
            random_state=resample_id, save_train_predictions=train_file
        )
    elif name == "weasel":
        return WEASEL(random_state=resample_id)
    elif name == "muse":
        return MUSE(random_state=resample_id)
    # Distance based
    elif name == "pf" or name == "proximityforest":
        return ProximityForest(random_state=resample_id)
    elif name == "pt" or name == "proximitytree":
        return ProximityTree(random_state=resample_id)
    elif name == "ps" or name == "proximityStump":
        return ProximityStump(random_state=resample_id)
    elif name == "dtwcv" or name == "kneighborstimeseriesclassifier":
        return KNeighborsTimeSeriesClassifier(distance="dtwcv")
    elif name == "dtw" or name == "1nn-dtw":
        return KNeighborsTimeSeriesClassifier(distance="dtw")
    elif name == "msm" or name == "1nn-msm":
        return KNeighborsTimeSeriesClassifier(distance="msm")
    elif name == "ee" or name == "elasticensemble":
        return ElasticEnsemble(random_state=resample_id)
    elif name == "shapedtw":
        return ShapeDTW()
    # Feature based
    elif name == "catch22":
        return Catch22Classifier(
            random_state=resample_id, estimator=RandomForestClassifier(n_estimators=500)
        )
    elif name == "matrixprofile":
        return MatrixProfileClassifier(random_state=resample_id)
    elif name == "signature":
        return SignatureClassifier(
            random_state=resample_id,
            estimator=RandomForestClassifier(n_estimators=500),
        )
    elif name == "tsfresh":
        return TSFreshClassifier(
            random_state=resample_id, estimator=RandomForestClassifier(n_estimators=500)
        )
    elif name == "tsfresh-r":
        return TSFreshClassifier(
            random_state=resample_id,
            estimator=RandomForestClassifier(n_estimators=500),
            relevant_feature_extractor=True,
        )
    # Hybrid
    elif name == "hc1" or name == "hivecotev1":
        return HIVECOTEV1(random_state=resample_id)
    elif name == "hc2" or name == "hivecotev2":
        return HIVECOTEV2(random_state=resample_id)
    # Interval based
    elif name == "rise" or name == "randomintervalspectralforest":
        return RandomIntervalSpectralEnsemble(
            random_state=resample_id, n_estimators=500
        )
    elif name == "tsf" or name == "timeseriesforestclassifier":
        return TimeSeriesForestClassifier(random_state=resample_id, n_estimators=500)
    elif name == "cif" or name == "canonicalintervalforest":
        return CanonicalIntervalForest(random_state=resample_id, n_estimators=500)
    elif name == "stsf" or name == "supervisedtimeseriesforest":
        return SupervisedTimeSeriesForest(random_state=resample_id, n_estimators=500)
    elif name == "drcif":
        return DrCIF(
            random_state=resample_id, n_estimators=500, save_transformed_data=train_file
        )
    # Kernel based
    elif name == "rocket":
        return ROCKETClassifier(random_state=resample_id)
    elif name == "arsenal":
        return Arsenal(random_state=resample_id, save_transformed_data=train_file)
    # Shapelet based
    elif name == "stc" or name == "shapelettransformclassifier":
        return ShapeletTransformClassifier(
            random_state=resample_id, save_transformed_data=train_file
        )
    elif name == "mrseql" or name == "mrseqlclassifier":
        return MrSEQLClassifier(seql_mode="fs", symrep=["sax", "sfa"])
    else:
        raise Exception("UNKNOWN CLASSIFIER")
示例#14
0
from sklearn.metrics.cluster import contingency_matrix
import numpy as np
import os
import sktime
import time


# -------------- SETUP -----------------

# Datasets path folder
DATA_PATH = os.path.join(os.path.dirname(__file__), "Datasets")

# Classifiers
# [ [classifier, classifier name], ...]
classifiers = [
    [KNeighborsTimeSeriesClassifier(1, 'uniform', 'dtw'), "DTW-1NN"],
    [KNeighborsTimeSeriesClassifier(4, 'uniform', 'dtw'), "DTW-4NN"]
]

# Number of cores to use (-1 -> all)
nb_jobs = -1

# Split strategy
nb_split = 10
cv = KFold(n_splits=nb_split)

# --------------- MAIN PROGRAM --------------------

# Display some info
nbDatasets = 0
for  dataset in os.listdir(DATA_PATH): nbDatasets+=1
# Run the fit and predict
for i, dataset in enumerate(datasets):
    print(f'Dataset: {i + 1}/{n_datasets} {dataset}')

    # pre-allocate results
    results = np.zeros(3)

    # load data
    train_file = os.path.join(data_path, f'{dataset}/{dataset}_TRAIN.ts')
    test_file = os.path.join(data_path, f'{dataset}/{dataset}_TEST.ts')

    x_train, y_train = load_from_tsfile_to_dataframe(train_file)
    x_test, y_test = load_from_tsfile_to_dataframe(test_file)

    tsf = KNeighborsTimeSeriesClassifier()

    # fit
    try:
        s = time.time()
        tsf.fit(x_train, y_train)
        results[0] = time.time() - s

        # predict
        s = time.time()
        y_pred = tsf.predict(x_test)
        results[1] = time.time() - s

    # catch and raise user exceptions
    except (KeyboardInterrupt, SystemExit):
        raise