def test_col_ens_on_unit_test_data(): """Test of ColumnEnsembleClassifier on unit test data.""" # load unit test data X_train, y_train = load_unit_test(split="train") X_test, y_test = load_unit_test(split="test") indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False) # train Column ensemble with a single fp = FreshPRINCE( random_state=0, default_fc_parameters="minimal", n_estimators=10, ) estimators = [("FreshPrince", fp, [0])] col_ens = ColumnEnsembleClassifier(estimators=estimators) col_ens.fit(X_train, y_train) # preds = col_ens.predict(X_test.iloc[indices]) # assert preds[0] == 2 # assert probabilities are the same probas = col_ens.predict_proba(X_test.iloc[indices]) testing.assert_array_almost_equal(probas, col_ens_unit_test_probas, decimal=2)
def test_col_ens_on_basic_motions(): """Test of ColumnEnsembleClassifier on basic motions data.""" # load basic motions data X_train, y_train = load_basic_motions(split="train") X_test, y_test = load_basic_motions(split="test") indices = np.random.RandomState(4).choice(len(y_train), 10, replace=False) fp = FreshPRINCE( random_state=0, default_fc_parameters="minimal", n_estimators=10, ) tde = TemporalDictionaryEnsemble( n_parameter_samples=10, max_ensemble_size=5, randomly_selected_params=5, random_state=0, ) drcif = DrCIF(n_estimators=10, random_state=0, save_transformed_data=True) estimators = [ ("FreshPrince", fp, [0, 1, 2]), ("TDE", tde, [3, 4]), ("DrCIF", drcif, [5]), ] # train column ensemble col_ens = ColumnEnsembleClassifier(estimators=estimators) col_ens.fit(X_train, y_train) # preds = col_ens.predict(X_test.iloc[indices]) # assert preds[0] == 2 # assert probabilities are the same probas = col_ens.predict_proba(X_test.iloc[indices]) testing.assert_array_almost_equal(probas, col_ens_basic_motions_probas, decimal=2)
def test_col_ens_on_basic_motions(): """Test of ColumnEnsembleClassifier on basic motions data.""" # load basic motions data X_train, y_train = load_basic_motions(split="train") X_test, y_test = load_basic_motions(split="test") indices = np.random.RandomState(4).choice(len(y_train), 10, replace=False) tde = TemporalDictionaryEnsemble( n_parameter_samples=10, max_ensemble_size=5, randomly_selected_params=5, random_state=0, ) drcif = DrCIF(n_estimators=10, random_state=0) estimators = [ ("TDE", tde, [3, 4]), ("DrCIF", drcif, [5]), ] # train column ensemble col_ens = ColumnEnsembleClassifier(estimators=estimators) col_ens.fit(X_train, y_train) probas = col_ens.predict_proba(X_test.iloc[indices]) testing.assert_array_almost_equal(probas, col_ens_basic_motions_probas, decimal=2)
def test_col_ens_on_unit_test_data(): """Test of ColumnEnsembleClassifier on unit test data.""" # load unit test data X_train, y_train = load_unit_test(split="train") X_test, y_test = load_unit_test(split="test") indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False) drcif = DrCIF(n_estimators=10, random_state=0) estimators = [("DrCIF", drcif, [0])] col_ens = ColumnEnsembleClassifier(estimators=estimators) col_ens.fit(X_train, y_train) # assert probabilities are the same probas = col_ens.predict_proba(X_test.iloc[indices]) testing.assert_array_almost_equal(probas, col_ens_unit_test_probas, decimal=2)
def fit(self, luck_average_windows, assessment_windows, until=None, max_horizon=9 * 6): logger("MODEL-FIT").debug( "max_horizon: {} / avg windows: {} / assmnt windows: {} / until: {} / total_data_size: {}".format( max_horizon, str(luck_average_windows), str(assessment_windows), until, len(self.data_points))) if until is not None and (until < 0 or until >= len(self.data_points)): logger("MODEL-FIT").error("Parameter until is too large for the given data points: {}".format(until)) return self.horizon = max_horizon for wi, w in enumerate(assessment_windows): if w > self.horizon: break # prepare data frame for sktime package temporary_data_fit_file = self.prepare_ts_file(0, len(self.data_points) if until is None else until, self.case_observation_size, wi, w) # parse data frames from the temporary fit data file X, y = load_from_tsfile_to_dataframe(temporary_data_fit_file, replace_missing_vals_with="-100") # which label is the first one? true_index = 0 if y[0] == "false": true_index = 1 new_class_weights = self.create_class_weight_dict(true_index=true_index) estimators = [] for i in range(0, len(luck_average_windows)): estimators.append(("TSF{}".format(i), TimeSeriesForestClassifier( n_estimators=int(self.no_estimators), n_jobs=16, max_depth=self.max_depth, class_weight=new_class_weights, criterion=self.criterion, min_samples_split=self.min_samples_split, min_samples_leaf=self.min_samples_leaf, oob_score=self.oob_score, bootstrap=self.bootstrap), [i])) c = ColumnEnsembleClassifier(estimators=estimators) c.fit(X, y) # print(str(c.classes_)) self.classifiers.append(c)
def columnEnsembleMethod(classifier_list, X, y, percent_train, clf_parameters=[]): #generate tuples (and format accordingly for the ensembler) estimator_list = [] Xtrain, Xtest, ytrain, ytest = splitTestTrain(X, y, percent_train) for i in classifier_list: params = [] built_clf = classifierBuilder(i['classifier'], params) num = i['columnNum'] name = i['classifier'] + str(num) estimator_list.append((name, built_clf, [num])) clf = ColumnEnsembleClassifier(estimators=estimator_list) start_time = time.time() clf.fit(Xtrain, ytrain) end_time = time.time() - start_time print('Total Time : ' + str(round(end_time, 2)) + ' seconds\n\n') return clf.score(Xtest, ytest)
# data -> our function -> (X_nested, y) X = generate_long_table(ts) X.head() X_nested = from_long_to_nested(X) X_nested.head() y = np.array(['a']) # , 'b', 'a', 'b', 'a', 'b', 'a', 'b']) print(X_nested) X_train, X_test, y_train, y_test = train_test_split(X_nested, y) print(X.head()) classifier = ColumnEnsembleClassifier(estimators=[ ("TSF1", TimeSeriesForestClassifier(n_estimators=100), [1]), ("TSF2", TimeSeriesForestClassifier(n_estimators=100), [2]), ]) classifier.fit(X_train, y_train) # Use the test portion of data for prediction so we can understand how accurate our model was learned y_pred = classifier.predict(X_test) # Use the native `accuracy_score` method to calculate the accuracy based on the test outcomes and the predicted outcomes print("Accuracy score is: " + str(accuracy_score(y_test, y_pred))) def generate_example_long_table(num_cases=50, series_len=20, num_dims=2): rows_per_case = series_len * num_dims total_rows = num_cases * series_len * num_dims case_ids = np.empty(total_rows, dtype=np.int) idxs = np.empty(total_rows, dtype=np.int)
X_train_timedata = X_train_timedata.to_frame() X_test_timedata = X_test_timedata.to_frame() ts_train = pd.Series(X_train_timedata['combine'].values, index=X_train_timedata.index) X_ts_train = ts_train.to_frame() ts_test = pd.Series(X_test_timedata['combine'].values, index=X_test_timedata.index) X_ts_test = ts_test.to_frame() for row_num in range(0, X_ts_train.shape[0]): series1 = pd.Series(X_ts_train.iat[row_num, 0]) X_ts_train.iat[row_num, 0] = series1 for row_num in range(0, X_ts_test.shape[0]): series2 = pd.Series(X_ts_test.iat[row_num, 0]) X_ts_test.iat[row_num, 0] = series2 ## =======================Column ensembling================================ clf = ColumnEnsembleClassifier(estimators=[ ("TSF0", TimeSeriesForestClassifier(n_estimators=5), [0]), ]) start_time = time.time() clf.fit(X_ts_train, y_train) Efficiency = time.time() - start_time Accuracy = clf.score(X_ts_test, y_test) print("Efficiency is:\n", Efficiency) print("Accuracy is :\n", Accuracy)
verbose=True), "full_features": make_pipeline( TruncationTransformer(lower=MAX_LENGTH), ColumnEnsembleClassifier([ ("features_0", make_pipeline(TSFreshFeatureExtractor( default_fc_parameters="efficient", show_warnings=False, n_jobs=-1), RandomForestClassifier(n_jobs=-1, random_state=1), verbose=True), [0]), ("features_1", make_pipeline(TSFreshFeatureExtractor( default_fc_parameters="efficient", show_warnings=False, n_jobs=-1), RandomForestClassifier(n_jobs=-1, random_state=1), verbose=True), [1]), ("features_2", make_pipeline(TSFreshFeatureExtractor( default_fc_parameters="efficient", show_warnings=False, n_jobs=-1), RandomForestClassifier(n_jobs=-1, random_state=1), verbose=True), [2]), ], verbose=True), verbose=True), "full_interval": make_pipeline(TruncationTransformer(lower=MAX_LENGTH), ColumnEnsembleClassifier([
if __name__ == "__main__": _print_array( "ColumnEnsembleClassifier - BasicMotions", _reproduce_classification_basic_motions( ColumnEnsembleClassifier(estimators=[ ( "cBOSS", ContractableBOSS(n_parameter_samples=4, max_ensemble_size=2, random_state=0), [5], ), ( "CIF", CanonicalIntervalForest( n_estimators=2, n_intervals=4, att_subsample_size=4, random_state=0, ), [3, 4], ), ])), ) _print_array( "BOSSEnsemble - UnitTest", _reproduce_classification_unit_test( BOSSEnsemble(max_ensemble_size=5, random_state=0)), )