Exemplo n.º 1
0
# create a feature representation pipeline
# setting y_func = last, and forecast = 200 makes us predict the value of y
# 200 samples ahead of the segment
# other reasonable options for y_func are ``mean``, ``all`` (or create your own function)
# see the API documentation for further details
clf = Pype([('segment',
             SegmentXYForecast(width=200,
                               overlap=0.5,
                               y_func=last,
                               forecast=200)), ('features', FeatureRep()),
            ('lin', LinearRegression())])

# fit and score
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test)

print("N series in train: ", len(X_train))
print("N series in test: ", len(X_test))
print("N segments in train: ", clf.N_train)
print("N segments in test: ", clf.N_test)
print("Score: ", score)

# generate some predictions
y, y_p = clf.transform_predict(X, y)  # all predictions
ytr, ytr_p = clf.transform_predict(X_train, y_train)  # training predictions
yte, yte_p = clf.transform_predict(X_test, y_test)  # test predictions

# note - the first few segments in the test set won't have predictions (gap)
# we plot the 'gap' for the visualization to hopefully make the situation clear
Ns = len(y)
Exemplo n.º 2
0
def test_pipe_regression():
    # no context data, single time series
    X = [np.random.rand(1000, 10)]
    y = [np.random.rand(1000)]

    pipe = Pype([('seg', SegmentXY()), ('ftr', FeatureRep()),
                 ('ridge', Ridge())])

    pipe.fit(X, y)
    pipe.transform_predict(X, y)
    pipe.predict(X)
    pipe.score(X, y)

    # context data, single time seres
    Xt = [np.random.rand(1000, 10)]
    Xc = [np.random.rand(3)]
    X = TS_Data(Xt, Xc)
    y = [np.random.rand(1000)]

    pipe.fit(X, y)
    pipe.transform_predict(X, y)
    pipe.predict(X)
    pipe.score(X, y)

    # multiple time seres
    Xt = [
        np.random.rand(1000, 10),
        np.random.rand(100, 10),
        np.random.rand(500, 10)
    ]
    Xc = np.random.rand(3, 3)
    X = TS_Data(Xt, Xc)
    y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)]

    pipe.fit(X, y)
    pipe.transform_predict(X, y)
    pipe.predict(X)
    pipe.score(X, y)

    # cross val
    Xt = np.array([np.random.rand(1000, 10)] * 5)
    Xc = np.random.rand(5, 3)
    X = TS_Data(Xt, Xc)
    y = np.array([np.random.rand(1000)] * 5)

    cross_validate(pipe, X, y, cv=3)

    # transform pipe
    pipe = Pype([('seg', SegmentXY()), ('ftr', FeatureRep()),
                 ('scaler', StandardScaler())])

    Xt = [
        np.random.rand(1000, 10),
        np.random.rand(100, 10),
        np.random.rand(500, 10)
    ]
    Xc = np.random.rand(3, 3)
    X = TS_Data(Xt, Xc)
    y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)]

    pipe.fit(X, y)
    pipe.transform(X, y)
    pipe.fit_transform(X, y)
Exemplo n.º 3
0
# load the data
data = load_watch()
X = data['X']
y = data['y']

# create a feature representation pipeline with PadTrunc segmentation
# the time series are between 20-40 seconds
# this truncates them all to the first 5 seconds (sampling rate is 50 Hz)

pipe = Pype([('trunc', PadTrunc(width=250)), ('features', FeatureRep()),
             ('scaler', StandardScaler()), ('svc', LinearSVC())])

# split the data
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.25,
                                                    shuffle=True,
                                                    random_state=42)

pipe.fit(X_train, y_train)
score = pipe.score(X_test, y_test)

print("N series in train: ", len(X_train))
print("N series in test: ", len(X_test))
print("N segments in train: ", pipe.N_train)
print("N segments in test: ", pipe.N_test)
print("Accuracy score: ", score)

img = mpimg.imread('trunk.jpg')
plt.imshow(img)
Exemplo n.º 4
0
def test_pipe_PadTrunc():
    # no context data, single time series
    X = [np.random.rand(1000, 10)]
    y = [5]

    pipe = Pype([('trunc', PadTrunc()), ('ftr', FeatureRep()),
                 ('rf', RandomForestClassifier(n_estimators=10))])

    pipe.fit(X, y)
    pipe.transform_predict(X, y)
    pipe.predict(X)
    pipe.score(X, y)

    # context data, single time seres
    Xt = [np.random.rand(1000, 10)]
    Xc = [np.random.rand(3)]
    X = TS_Data(Xt, Xc)
    y = [5]

    pipe.fit(X, y)
    pipe.transform_predict(X, y)
    pipe.predict(X)
    pipe.score(X, y)

    # multiple time series
    Xt = [
        np.random.rand(1000, 10),
        np.random.rand(100, 10),
        np.random.rand(500, 10)
    ]
    Xc = np.random.rand(3, 3)
    X = TS_Data(Xt, Xc)
    y = [1, 2, 3]

    pipe.fit(X, y)
    pipe.transform_predict(X, y)
    pipe.predict(X)
    pipe.score(X, y)

    # univariate data
    Xt = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)]
    Xc = np.random.rand(3)
    X = TS_Data(Xt, Xc)
    y = [1, 2, 3]

    pipe.fit(X, y)
    pipe.transform_predict(X, y)
    pipe.predict(X)
    pipe.score(X, y)

    # transform pipe
    pipe = Pype([('trunc', PadTrunc()), ('ftr', FeatureRep()),
                 ('scaler', StandardScaler())])

    Xt = [
        np.random.rand(1000, 10),
        np.random.rand(100, 10),
        np.random.rand(500, 10)
    ]
    Xc = np.random.rand(3, 3)
    X = TS_Data(Xt, Xc)
    y = [1, 2, 3]

    pipe.fit(X, y)
    pipe.transform(X, y)
    pipe.fit_transform(X, y)