def test_col_ens_on_unit_test_data():
    """Test of ColumnEnsembleClassifier on unit test data."""
    # load unit test data
    X_train, y_train = load_unit_test(split="train")
    X_test, y_test = load_unit_test(split="test")
    indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False)

    # train Column ensemble with a single
    fp = FreshPRINCE(
        random_state=0,
        default_fc_parameters="minimal",
        n_estimators=10,
    )
    estimators = [("FreshPrince", fp, [0])]
    col_ens = ColumnEnsembleClassifier(estimators=estimators)
    col_ens.fit(X_train, y_train)
    # preds = col_ens.predict(X_test.iloc[indices])

    # assert preds[0] == 2
    # assert probabilities are the same
    probas = col_ens.predict_proba(X_test.iloc[indices])

    testing.assert_array_almost_equal(probas,
                                      col_ens_unit_test_probas,
                                      decimal=2)
def test_col_ens_on_basic_motions():
    """Test of ColumnEnsembleClassifier on basic motions data."""
    # load basic motions data
    X_train, y_train = load_basic_motions(split="train")
    X_test, y_test = load_basic_motions(split="test")
    indices = np.random.RandomState(4).choice(len(y_train), 10, replace=False)
    fp = FreshPRINCE(
        random_state=0,
        default_fc_parameters="minimal",
        n_estimators=10,
    )
    tde = TemporalDictionaryEnsemble(
        n_parameter_samples=10,
        max_ensemble_size=5,
        randomly_selected_params=5,
        random_state=0,
    )
    drcif = DrCIF(n_estimators=10, random_state=0, save_transformed_data=True)
    estimators = [
        ("FreshPrince", fp, [0, 1, 2]),
        ("TDE", tde, [3, 4]),
        ("DrCIF", drcif, [5]),
    ]

    # train column ensemble
    col_ens = ColumnEnsembleClassifier(estimators=estimators)
    col_ens.fit(X_train, y_train)
    # preds = col_ens.predict(X_test.iloc[indices])

    # assert preds[0] == 2
    # assert probabilities are the same
    probas = col_ens.predict_proba(X_test.iloc[indices])
    testing.assert_array_almost_equal(probas,
                                      col_ens_basic_motions_probas,
                                      decimal=2)
示例#3
0
def test_col_ens_on_basic_motions():
    """Test of ColumnEnsembleClassifier on basic motions data."""
    # load basic motions data
    X_train, y_train = load_basic_motions(split="train")
    X_test, y_test = load_basic_motions(split="test")
    indices = np.random.RandomState(4).choice(len(y_train), 10, replace=False)
    tde = TemporalDictionaryEnsemble(
        n_parameter_samples=10,
        max_ensemble_size=5,
        randomly_selected_params=5,
        random_state=0,
    )
    drcif = DrCIF(n_estimators=10, random_state=0)
    estimators = [
        ("TDE", tde, [3, 4]),
        ("DrCIF", drcif, [5]),
    ]

    # train column ensemble
    col_ens = ColumnEnsembleClassifier(estimators=estimators)
    col_ens.fit(X_train, y_train)
    probas = col_ens.predict_proba(X_test.iloc[indices])
    testing.assert_array_almost_equal(probas,
                                      col_ens_basic_motions_probas,
                                      decimal=2)
示例#4
0
def test_col_ens_on_unit_test_data():
    """Test of ColumnEnsembleClassifier on unit test data."""
    # load unit test data
    X_train, y_train = load_unit_test(split="train")
    X_test, y_test = load_unit_test(split="test")
    indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False)
    drcif = DrCIF(n_estimators=10, random_state=0)
    estimators = [("DrCIF", drcif, [0])]
    col_ens = ColumnEnsembleClassifier(estimators=estimators)
    col_ens.fit(X_train, y_train)
    # assert probabilities are the same
    probas = col_ens.predict_proba(X_test.iloc[indices])
    testing.assert_array_almost_equal(probas,
                                      col_ens_unit_test_probas,
                                      decimal=2)
    def fit(self, luck_average_windows, assessment_windows, until=None, max_horizon=9 * 6):
        logger("MODEL-FIT").debug(
            "max_horizon: {} / avg windows: {} / assmnt windows: {} / until: {} / total_data_size: {}".format(
                max_horizon,
                str(luck_average_windows),
                str(assessment_windows),
                until,
                len(self.data_points)))
        if until is not None and (until < 0 or until >= len(self.data_points)):
            logger("MODEL-FIT").error("Parameter until is too large for the given data points: {}".format(until))
            return
        self.horizon = max_horizon
        for wi, w in enumerate(assessment_windows):
            if w > self.horizon:
                break
            # prepare data frame for sktime package

            temporary_data_fit_file = self.prepare_ts_file(0, len(self.data_points) if until is None else until,
                                                           self.case_observation_size, wi, w)

            # parse data frames from the temporary fit data file
            X, y = load_from_tsfile_to_dataframe(temporary_data_fit_file, replace_missing_vals_with="-100")
            # which label is the first one?
            true_index = 0
            if y[0] == "false":
                true_index = 1
            new_class_weights = self.create_class_weight_dict(true_index=true_index)
            estimators = []
            for i in range(0, len(luck_average_windows)):
                estimators.append(("TSF{}".format(i), TimeSeriesForestClassifier(
                    n_estimators=int(self.no_estimators),
                    n_jobs=16,
                    max_depth=self.max_depth,
                    class_weight=new_class_weights,
                    criterion=self.criterion,
                    min_samples_split=self.min_samples_split,
                    min_samples_leaf=self.min_samples_leaf,
                    oob_score=self.oob_score,
                    bootstrap=self.bootstrap),
                                   [i]))
            c = ColumnEnsembleClassifier(estimators=estimators)
            c.fit(X, y)
            # print(str(c.classes_))
            self.classifiers.append(c)
示例#6
0
def columnEnsembleMethod(classifier_list,
                         X,
                         y,
                         percent_train,
                         clf_parameters=[]):
    #generate tuples (and format accordingly for the ensembler)
    estimator_list = []
    Xtrain, Xtest, ytrain, ytest = splitTestTrain(X, y, percent_train)
    for i in classifier_list:
        params = []
        built_clf = classifierBuilder(i['classifier'], params)
        num = i['columnNum']
        name = i['classifier'] + str(num)
        estimator_list.append((name, built_clf, [num]))
    clf = ColumnEnsembleClassifier(estimators=estimator_list)
    start_time = time.time()
    clf.fit(Xtrain, ytrain)
    end_time = time.time() - start_time
    print('Total Time : ' + str(round(end_time, 2)) + ' seconds\n\n')
    return clf.score(Xtest, ytest)
示例#7
0
# data -> our function -> (X_nested, y)

X = generate_long_table(ts)
X.head()

X_nested = from_long_to_nested(X)
X_nested.head()
y = np.array(['a'])  # , 'b', 'a', 'b', 'a', 'b', 'a', 'b'])

print(X_nested)

X_train, X_test, y_train, y_test = train_test_split(X_nested, y)
print(X.head())
classifier = ColumnEnsembleClassifier(estimators=[
    ("TSF1", TimeSeriesForestClassifier(n_estimators=100), [1]),
    ("TSF2", TimeSeriesForestClassifier(n_estimators=100), [2]),
])
classifier.fit(X_train, y_train)

# Use the test portion of data for prediction so we can understand how accurate our model was learned
y_pred = classifier.predict(X_test)
# Use the native `accuracy_score` method to calculate the accuracy based on the test outcomes and the predicted outcomes
print("Accuracy score is: " + str(accuracy_score(y_test, y_pred)))


def generate_example_long_table(num_cases=50, series_len=20, num_dims=2):
    rows_per_case = series_len * num_dims
    total_rows = num_cases * series_len * num_dims

    case_ids = np.empty(total_rows, dtype=np.int)
    idxs = np.empty(total_rows, dtype=np.int)
示例#8
0
X_train_timedata = X_train_timedata.to_frame()
X_test_timedata = X_test_timedata.to_frame()

ts_train = pd.Series(X_train_timedata['combine'].values,
                     index=X_train_timedata.index)
X_ts_train = ts_train.to_frame()

ts_test = pd.Series(X_test_timedata['combine'].values,
                    index=X_test_timedata.index)
X_ts_test = ts_test.to_frame()

for row_num in range(0, X_ts_train.shape[0]):
    series1 = pd.Series(X_ts_train.iat[row_num, 0])
    X_ts_train.iat[row_num, 0] = series1

for row_num in range(0, X_ts_test.shape[0]):
    series2 = pd.Series(X_ts_test.iat[row_num, 0])
    X_ts_test.iat[row_num, 0] = series2

## =======================Column ensembling================================
clf = ColumnEnsembleClassifier(estimators=[
    ("TSF0", TimeSeriesForestClassifier(n_estimators=5), [0]),
])

start_time = time.time()
clf.fit(X_ts_train, y_train)
Efficiency = time.time() - start_time
Accuracy = clf.score(X_ts_test, y_test)
print("Efficiency is:\n", Efficiency)
print("Accuracy is :\n", Accuracy)
示例#9
0
               verbose=True),
 "full_features":
 make_pipeline(
     TruncationTransformer(lower=MAX_LENGTH),
     ColumnEnsembleClassifier([
         ("features_0",
          make_pipeline(TSFreshFeatureExtractor(
              default_fc_parameters="efficient",
              show_warnings=False,
              n_jobs=-1),
                        RandomForestClassifier(n_jobs=-1, random_state=1),
                        verbose=True), [0]),
         ("features_1",
          make_pipeline(TSFreshFeatureExtractor(
              default_fc_parameters="efficient",
              show_warnings=False,
              n_jobs=-1),
                        RandomForestClassifier(n_jobs=-1, random_state=1),
                        verbose=True), [1]),
         ("features_2",
          make_pipeline(TSFreshFeatureExtractor(
              default_fc_parameters="efficient",
              show_warnings=False,
              n_jobs=-1),
                        RandomForestClassifier(n_jobs=-1, random_state=1),
                        verbose=True), [2]),
     ],
                              verbose=True),
     verbose=True),
 "full_interval":
 make_pipeline(TruncationTransformer(lower=MAX_LENGTH),
               ColumnEnsembleClassifier([
示例#10
0

if __name__ == "__main__":
    _print_array(
        "ColumnEnsembleClassifier - BasicMotions",
        _reproduce_classification_basic_motions(
            ColumnEnsembleClassifier(estimators=[
                (
                    "cBOSS",
                    ContractableBOSS(n_parameter_samples=4,
                                     max_ensemble_size=2,
                                     random_state=0),
                    [5],
                ),
                (
                    "CIF",
                    CanonicalIntervalForest(
                        n_estimators=2,
                        n_intervals=4,
                        att_subsample_size=4,
                        random_state=0,
                    ),
                    [3, 4],
                ),
            ])),
    )
    _print_array(
        "BOSSEnsemble - UnitTest",
        _reproduce_classification_unit_test(
            BOSSEnsemble(max_ensemble_size=5, random_state=0)),
    )