# create the label vector and the corresponding semantic vector y = np.array([0, 1, 2, 3, 4, 5, 6, 7]) labels = [ 'LB_BC', 'RB_BC', 'LF_BC', 'RF_BC', 'LB_BP', 'RB_BP', 'LF_BP', 'RF_BP' ] # segment the data and labels segmenter = SegmentX(100, 0.5) X_new, y_new, _ = segmenter.fit_transform(X, y) ################################################################################################### # create a pipeline for LDA transformation of the feature representation est = Pipeline([('features', FeatureRep()), ('lda', LinearDiscriminantAnalysis(n_components=2))]) pipe = SegPipe(est) # plot embedding X2, y2 = pipe.fit_transform(X_new, y_new) plot_embedding(X2, y2.astype(int), labels) plt.show() ################################################################################################### # create a pipeline for feature representation est = Pipeline([('features', FeatureRep()), ('scaler', StandardScaler()), ('rf', RandomForestClassifier())]) pipe = SegPipe(est) # split the data X_train, X_test, y_train, y_test = train_test_split(X_new,
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model # load the data data = load_watch() X = data['X'] y = data['y'] # create a segment learning pipeline width = 100 est = KerasClassifier(build_fn=crnn_model, epochs = 10, batch_size = 256, verbose = 0) pipe = SegPipe(est) # split the data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42) pipe.fit(X_train,y_train) score = pipe.score(X_test, y_test) print("N series in train: ", len(X_train)) print("N series in test: ", len(X_test)) print("N segments in train: ", pipe.N_train) print("N segments in test: ", pipe.N_test) print("Accuracy score: ", score) img = mpimg.imread('segments.jpg')
plt.tight_layout() ############################################## # SETUP ############################################## # load the data data = load_watch() X = data['X'] y = data['y'] # create a feature representation pipeline est = Pipeline([('features', FeatureRep()), ('scaler', StandardScaler()), ('rf', RandomForestClassifier())]) pipe = SegPipe(est) # split the data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42) ############################################## # OPTION 1: Use the score SegPipe score method ############################################## pipe.fit(X_train, y_train) score = pipe.score(X_test, y_test) print("Accuracy score: ", score)
# split the data along the time axis (our only option since we have only 1 time series) X_train, X_test, y_train, y_test = temporal_split(X, y, test_size=0.25) # create a feature representation pipeline est = Pipeline([('features', FeatureRep()), ('lin', LinearRegression())]) # setting y_func = last, and forecast = 200 makes us predict the value of y # 200 samples ahead of the segment # other reasonable options for y_func are ``mean``, ``all`` (or create your own function) # see the API documentation for further details segmenter = SegmentXYForecast(width=200, overlap=0.5, y_func=last, forecast=200) pipe = SegPipe(est, segmenter) # fit and score pipe.fit(X_train, y_train) score = pipe.score(X_test, y_test) print("N series in train: ", len(X_train)) print("N series in test: ", len(X_test)) print("N segments in train: ", pipe.N_train) print("N segments in test: ", pipe.N_test) print("Score: ", score) # generate some predictions y, y_p = pipe.predict(X, y) # all predictions ytr, ytr_p = pipe.predict(X_train, y_train) # training predictions yte, yte_p = pipe.predict(X_test, y_test) # test predictions
############################################## # Setup ############################################## # load the data data = load_watch() X = data['X'] y = data['y'] # split the data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42) # create a segment learning pipeline width = 100 est = KerasClassifier(build_fn=crnn_model, epochs = 10, batch_size = 256, verbose = 0, validation_split = 0.2) pipe = SegPipe(est) ############################################## # Accessing training history ############################################## # this is a bit of a hack, because history object is returned by the # keras wrapper when fit is called # this approach won't work with a more complex estimator pipeline, in which case # a callable class with the desired properties should be made passed to build_fn pipe.fit(X_train,y_train) print(DataFrame(pipe.history.history)) ac_train = pipe.history.history['acc'] ac_val = pipe.history.history['val_acc'] epoch = np.arange(len(ac_train))+1
def test_pipe_regression(): # no context data, single time series X = [np.random.rand(1000, 10)] y = [np.random.rand(1000)] est = Pipeline([('ftr', FeatureRep()), ('ridge', Ridge())]) pipe = SegPipe(est, segmenter=SegmentXY()) pipe.fit(X, y) pipe.predict(X, y) pipe.score(X, y) # context data, single time seres Xt = [np.random.rand(1000, 10)] Xc = [np.random.rand(3)] X = make_ts_data(Xt, Xc) y = [np.random.rand(1000)] pipe.fit(X, y) pipe.predict(X, y) pipe.score(X, y) # multiple time seres Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = make_ts_data(Xt, Xc) y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)] pipe.fit(X, y) pipe.predict(X, y) pipe.score(X, y) # cross val Xt = np.array([np.random.rand(1000, 10) for i in range(5)]) Xc = np.random.rand(5, 3) X = make_ts_data(Xt, Xc) y = np.array([np.random.rand(1000) for i in range(5)]) cross_validate(pipe, X, y) # transform pipe est = Pipeline([('ftr', FeatureRep()), ('scaler', StandardScaler())]) pipe = SegPipe(est, segmenter=SegmentXY()) Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = make_ts_data(Xt, Xc) y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)] pipe.fit(X, y) pipe.transform(X, y) pipe.fit_transform(X, y)
def test_pipe_classification(): # no context data, single time series X = [np.random.rand(1000, 10)] y = [5] est = Pipeline([('ftr', FeatureRep()), ('ridge', RandomForestClassifier())]) pipe = SegPipe(est, segmenter=SegmentX()) pipe.fit(X, y) pipe.predict(X, y) pipe.score(X, y) # context data, single time seres Xt = [np.random.rand(1000, 10)] Xc = [np.random.rand(3)] X = make_ts_data(Xt, Xc) y = [5] pipe.fit(X, y) pipe.predict(X, y) pipe.score(X, y) # multiple time series Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = make_ts_data(Xt, Xc) y = [1, 2, 3] pipe.fit(X, y) pipe.predict(X, y) pipe.score(X, y) # univariate data Xt = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)] Xc = np.random.rand(3) X = make_ts_data(Xt, Xc) y = [1, 2, 3] pipe.fit(X, y) pipe.predict(X, y) pipe.score(X, y) # transform pipe est = Pipeline([('ftr', FeatureRep()), ('scaler', StandardScaler())]) pipe = SegPipe(est, segmenter=SegmentX()) Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = make_ts_data(Xt, Xc) y = [1, 2, 3] pipe.fit(X, y) pipe.transform(X, y) pipe.fit_transform(X, y)
return model # load the data data = load_watch() X = data['X'] y = data['y'] # temporal splitting of data splitter = TemporalKFold(n_splits=3) Xs, ys, cv = splitter.split(X,y) # create a segment learning pipeline width = 100 est = KerasClassifier(build_fn=crnn_model, epochs = 5, batch_size = 256, verbose = 0) pipe = SegPipe(est) # create a parameter dictionary using the SegPipe API - which is similar to the sklearn API # # parameters passed to an estimator in the ``feed`` pipeline are keyed ``f$estimator__parameter`` # parameters passed to an estimator in the ``est`` pipeline are keyed ``e$estimator__parameter`` # # when the ``feed`` or ``est`` pipeline is not a pipeline, but just a single estimator # the parameter would be keyed f$parameter or e$parameter respectively # # you can also set a parameter to be always equal to another parameter, by setting its value to # parameter name to track # # note that if you want to set a parameter to a single value, it will still need to be as a list par_grid = {'s$width' : [50,100,200],