def main(): download_robot_execution_failures() df_ts, y = load_robot_execution_failures() # We create an empty feature matrix that has the proper index X = pd.DataFrame(index=y.index) # Split data into train and test set X_train, X_test, y_train, y_test = train_test_split(X, y) print(df_ts) # We have a pipeline that consists of a feature extraction step with a subsequent Random Forest Classifier ppl = Pipeline([('fresh', RelevantFeatureAugmenter(column_id='id', column_sort='time')), ('clf', RandomForestClassifier())]) # Here comes the tricky part, due to limitations of the sklearn pipeline API, we can not pass the dataframe # containing the time series dataframe but instead have to use the set_params method # In this case, df_ts contains the time series of both train and test set, if you have different dataframes for # train and test set, you have to call set_params two times (see the notebook pipeline_with_two_datasets.ipynb) ppl.set_params(fresh__timeseries_container=df_ts) # We fit the pipeline ppl.fit(X_train, y_train) # Predicting works as well y_pred = ppl.predict(X_test) # So, finally we inspect the performance print(classification_report(y_test, y_pred))
def setUp(self): download_robot_execution_failures() self.timeseries, self.y = load_robot_execution_failures() self.df = pd.DataFrame(index=self.timeseries.id.unique()) # shrink the time series for this test self.timeseries = self.timeseries[["id", "time", "a"]]
def setUp(self): download_robot_execution_failures() self.timeseries, self.y = load_robot_execution_failures() self.df = pd.DataFrame(index=self.timeseries.id.unique()) # shrink the time series for this test self.timeseries = self.timeseries[["id", "time", "F_x"]]
def setUp(self): self.temporary_folder = tempfile.mkdtemp() temporary_file = os.path.join(self.temporary_folder, "data") download_robot_execution_failures(file_name=temporary_file) self.timeseries, self.y = load_robot_execution_failures(file_name=temporary_file) self.df = pd.DataFrame(index=self.timeseries.id.unique()) # shrink the time series for this test self.timeseries = self.timeseries[["id", "time", "F_x"]]
def testLocalTSFresh(self): robot_execution_failures.download_robot_execution_failures() df, y = robot_execution_failures.load_robot_execution_failures() dist = MarsDistributor() df = df.iloc[:200] extraction_settings = ComprehensiveFCParameters() extract_features(df, column_id='id', column_sort='time', default_fc_parameters=extraction_settings, # we impute = remove all NaN features automatically impute_function=impute, distributor=dist)
def test_distributed_ts_fresh(setup): robot_execution_failures.download_robot_execution_failures() df, y = robot_execution_failures.load_robot_execution_failures() default_session = get_default_session() sync_session = new_session(default_session.address) dist = MarsDistributor(session=sync_session) df = df.iloc[:200].copy() extraction_settings = ComprehensiveFCParameters() extract_features(df, column_id='id', column_sort='time', default_fc_parameters=extraction_settings, # we impute = remove all NaN features automatically impute_function=impute, distributor=dist)
def testDistributedTSFresh(self): robot_execution_failures.download_robot_execution_failures() df, y = robot_execution_failures.load_robot_execution_failures() service_ep = 'http://127.0.0.1:' + self.web_port with new_session(service_ep) as sess: dist = MarsDistributor(sess) df = df.iloc[:200] extraction_settings = ComprehensiveFCParameters() extract_features(df, column_id='id', column_sort='time', default_fc_parameters=extraction_settings, # we impute = remove all NaN features automatically impute_function=impute, distributor=dist)
def test_timing(): from tsfresh.examples.robot_execution_failures import download_robot_execution_failures, \ load_robot_execution_failures download_robot_execution_failures() df, y = load_robot_execution_failures() commit_hash = check_output(["git", "log", "--format=\"%H\"", "-1" ]).decode("ascii").strip().replace("\"", "") lengths_to_test = [1, 5, 10, 60, 100, 400, 600, 1000, 2000] results = [] for length in lengths_to_test: results.append(test_with_length(length, df)) results.append(test_with_length(length, df)) results.append(test_with_length(length, df)) results = pd.DataFrame(results) results.to_csv("{hash}.dat".format(hash=commit_hash))
def main(): # download and load the data download_robot_execution_failures() timeseries, y = load_robot_execution_failures() # plot healthy example timeseries[timeseries['id'] == 3].plot(subplots=True, sharex=True, figsize=(10,10)) # plot failure example timeseries[timeseries['id'] == 21].plot(subplots=True, sharex=True, figsize=(10,10)) plt.show() # extract features extracted_features = extract_features(timeseries, column_id="id", column_sort="time") print('shape of extracted features: {},{}'.format(*extracted_features.shape)) # fill NaNs based on rules impute(extracted_features) # filter for significant features features_filtered = select_features(extracted_features, y) print('shape of selected features: {},{}'.format(*features_filtered.shape)) import pdb; pdb.set_trace()
import numpy as np import pickle import matplotlib.pyplot as plt import random import math from scipy.fftpack import fft, irfft, rfft from scipy.optimize import curve_fit import tsfresh.feature_extraction.feature_calculators as ts from tsfresh.examples.robot_execution_failures import download_robot_execution_failures, load_robot_execution_failures from tsfresh import extract_features download_robot_execution_failures() timeseries, y = load_robot_execution_failures() def normalize(signal, range=None, offset=None): ''' # If range = None and Offset = None: - return normalized signal with values in range (0,1) # Range squeezes the signal between range(-range, +range) # Offet adds offset... ''' norm_sig = (signal - np.min(signal)) / (np.max(signal) - np.min(signal)) if range is not None: norm_sig = (2 * norm_sig - 1) * range if offset is not None: norm_sig = norm_sig + offset return norm_sig # with open('/Users/jiayun/PycharmProjects/D'
def setUp(self): download_robot_execution_failures() self.X, self.y = load_robot_execution_failures()
# -*- coding: utf-8 -*- from tsfresh.examples.robot_execution_failures import download_robot_execution_failures, load_robot_execution_failures import matplotlib.pyplot as plt from tsfresh import extract_features from tsfresh import select_features from tsfresh import extract_relevant_features from tsfresh.utilities.dataframe_functions import impute import pandas as pd import numpy as np from classifiers.base_classification import Base_Classification from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score ### First test, delet after real test work ### ''' download_robot_execution_failures() timeseries, y = load_robot_execution_failures() print(timeseries.head()) timeseries[timeseries['id'] == 3].plot(subplots=True, sharex=True, figsize=(10,10)) plt.show() extracted_features = extract_features(timeseries, column_id="id", column_sort="time") impute(extracted_features) features_filtered = select_features(extracted_features, y) ''' ### Real Work ### # First load hmp data in extratrees_classification.py dataframe_1 = hmp.data_with_window["f1"]["training"] dataframe_2 = pd.DataFrame()
def setUp(self): download_robot_execution_failures() self.X, self.y = load_robot_execution_failures()