def auto_feature_engineering(input_df: pd.DataFrame, cols: List[str], window_size: int = 5) -> pd.DataFrame: """ Automated feature engineering wrapper using TSFEL package for features generated based on temporal metrics :input_df: Input data :window_size: Size of the window :return: X data """ cfg_file = tsfel.get_features_by_domain("temporal") X_df_list = [] for col in cols: current_df = tsfel.time_series_features_extractor( cfg_file, input_df, fs=50, window_splitter=True, window_size=window_size, ) current_df = current_df.add_prefix(col) X_df_list.append(current_df) X = pd.concat(X_df_list, axis=1) return X
def get_last_window(self, mfe_features=None, tsfel_config=None, features_summaries=None, n_classes=None, delta_acc_summary_func=None): if features_summaries is None: features_summaries = ["max", "min", "mean", "var"] if mfe_features is None: mfe_features = ["nr_class", "attr_ent", "kurtosis", "skewness"] if tsfel_config is None: tsfel_config = tsfel.get_features_by_domain() if self.history is not None: X = [sample for x in self.history for sample in x[0]] if delta_acc_summary_func is not None: current_acc = summary_funcs[delta_acc_summary_func]( [x[2] for x in self.history]) last_window_acc = self.last_window_acc self.last_window_acc = current_acc else: last_window_acc = None current_acc = None features = get_window_features(X, mfe_features, tsfel_config, features_summaries, n_classes=n_classes, last_window_acc=last_window_acc, current_acc=current_acc) self.history = [] return features else: return None
def tsfel_calculator(x): import tsfel # Instantiate calculation configuration cfg_file = tsfel.get_features_by_domain() # Produce calculations extracted_features = tsfel.time_series_features_extractor(cfg_file, x) return extracted_features
import pandas as pd df = pd.read_sql_table('display', 'sqlite:///dissertation.db') df_engineering = df.copy() #df_engineering['id']=np.repeat(range(1,4033),360) df_20weeks = df_engineering[:1209600] # 20 weeks #df_tsfresh=df_engineering[['time','id','kWh']] df_tsfel = df_20weeks[['kWh']] df_hour = df_20weeks[['kWh']] * 1000 # convert kwh to wh df_hour.rename(columns={'kWh': 'Wh'}, inplace=True) import tsfel #from tsfresh import extract_relevant_features cfg = tsfel.get_features_by_domain() X_train = tsfel.time_series_features_extractor(cfg, df_tsfel, window_splitter=True, window_size=8640) # one day print(list(X_train.columns)) X_train = X_train[[ '0_Absolute energy', '0_Mean', '0_Max', '0_Standard deviation', '0_FFT mean coefficient_0', '0_Spectral kurtosis', '0_Skewness', '0_Zero crossing rate' ]] X_hour = tsfel.time_series_features_extractor(cfg, df_hour, window_splitter=True, window_size=360) # one hour
k for k in df.columns if 'Average' in k and 'huser' not in k and 'lejl' not in k)] #minmaxscaling scaler = MinMaxScaler(feature_range=(0, 1)) df[categories] = scaler.fit_transform(df[categories]) X_train = tsfel.time_series_features_extractor(cfg_file, serie, fs=24, window_splitter=True, window_size=720) #serie= #X_train = tsfel.time_series_features_extractor(cfg_file, serie, fs=24, window_splitter=True, window_size=720) corr_features = tsfel.correlated_features(X_train) list = [] cfg_file = tsfel.get_features_by_domain() for category in categories: serie = df[category].dropna() X_train = tsfel.time_series_features_extractor(cfg_file, serie, fs=24, window_splitter=True, window_size=720) X_train.drop(corr_features, axis=1, inplace=True) #X_train=scaler.fit_transform(X_train) list.append(X_train) mo = [] for m in np.arange(12): data = pd.DataFrame() for i in np.arange(len(list)):
def makeDataset(self, signals=False,strategy="ad-hoc"): def adhoc(strategy): self.features = strategy if(strategy=="ad-hoc"): self.features = self.signalsToFeatures(signals) featuresSpec = [] for var in self.features: #print (var) x = var.split(" ") if len(x) == 2: x.append(False) else: x[2] = True featuresSpec.append(x) signals.append(x[0]) for feature in featuresSpec: var = feature[0] isDerivate = feature[2] featureType = feature[1] if(isDerivate == True): temp = "" if(self.includeFiltering == True): temp = self.filterOutliers(chunk[var].diff(), self.n) else: temp = chunk[var].diff() if featureType == "mean": w.append(np.nanmean(temp)) elif featureType == "std": w.append(np.nanstd(temp)) else: w.append(np.nanpercentile(temp,int(featureType))) else: temp = "" if(self.includeFiltering == True): temp = self.filterOutliers(chunk[var], self.n) else: temp = chunk[var] if featureType == "mean": w.append(np.nanmean(temp)) elif featureType == "std": w.append(np.nanstd(temp)) else: w.append(np.nanpercentile(temp,int(featureType))) return w Id = [] X = [] Y = [] for j, row in self.files_list.iterrows(): file = row["cycleName"] df = pd.read_csv(self.fpathCameoFiles + file, encoding ="latin1", usecols = signals) chunks = self.makeChunks(df, int(df.shape[0]/(self.minuteWindow*60))) for chunk in chunks: chunk = chunk.fillna(method='bfill') #return chunk,"a","a" w = [] if(strategy=="ad-hoc" or type(strategy) == list): w = adhoc(strategy) elif(strategy=="tsfel-all" or strategy=="tsfel-all-corr"): cfg = tsfel.get_features_by_domain() w = tsfel.time_series_features_extractor(cfg, chunk) self.features = list(w.columns) elif(strategy=="tsfel-statistical"): cfg = tsfel.get_features_by_domain('statistical') w = tsfel.time_series_features_extractor(cfg, chunk) self.features = list(w.columns) elif(strategy=="tsfel-temporal"): cfg = tsfel.get_features_by_domain('temporal') w = tsfel.time_series_features_extractor(cfg, chunk) self.features = list(w.columns) elif(strategy=="vest"): model = BivariateVEST() features = model.extract_features(df, pairwise_transformations=False, summary_operators=SUMMARY_OPERATIONS_SMALL) w = multivariate_feature_extraction(df, apply_transform_operators=False, summary_operators=SUMMARY_OPERATIONS_SMALL) self.features = list(w.columns) X.append(w) Id.append(file) Y.append(row['label']) if(strategy=="tsfel-all-corr"): dataset = pd.concat(X) to_drop = tsfel.correlated_features(dataset) dataset = dataset.drop(to_drop,axis=1) self.features = dataset.columns X = dataset return np.array(X), np.array(Y), np.array(Id)
import tsfel FEATURES_JSON = tsfel.__path__[0] + '/feature_extraction/features.json' settings0 = tsfel.load_json(FEATURES_JSON) settings1 = tsfel.get_features_by_domain('statistical') settings2 = tsfel.get_features_by_domain('temporal') settings3 = tsfel.get_features_by_domain('spectral') settings4 = tsfel.get_features_by_domain(None) settings5 = tsfel.extract_sheet('Features')
import sklearn.metrics as metrics from sklearn.cluster import KMeans from sklearn.preprocessing import MinMaxScaler from sklearn.metrics import silhouette_score import pandas as pd import tsfel # IMPORT df=pd.read_csv('Data kategorier.csv',skiprows=2,index_col=['READ_TIME_Date']) df.index = pd.to_datetime(df.index) # Houses class serie=df.iloc[:,7].dropna() cfg_file = tsfel.get_features_by_domain(domain='statistical') X_train = tsfel.time_series_features_extractor(cfg_file, serie, fs=24, window_splitter=True, window_size=720) # Remove corr features corr_features = tsfel.correlated_features(X_train) X_train.drop(corr_features, axis=1, inplace=True) X_train['months']=['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'] X_train.index=X_train['months'] X_train.drop(['months'],axis=1) Kurtosis=X_train['0_Spectral kurtosis'] #SignifFeatKurtosis=X_train['0_Spectral kurtosis'] Skewness=X_train['0_Skewness']
from sklearn.decomposition import PCA pl.rc('text', usetex=True) pl.rc('font', family='serif', serif='Times') #%% #Don't forget to change this PATH path = 'csv/' dataframeempty = pd.DataFrame() W = [] for csv_path in glob(path + 'Residential_*.csv'): df = pd.read_csv(csv_path) df.dropna(inplace=True) df.drop(['date'], axis=1, inplace=True) # Retrieves a pre-defined feature configuration file to extract all available features cfg = tsfel.get_features_by_domain() # Extract features X = tsfel.time_series_features_extractor(cfg, df) X['File Name'] = csv_path dataframeempty = dataframeempty.append(X) cfgx = tsfel.get_features_by_domain(domain="spectral") cfgs = tsfel.get_features_by_domain(domain="statistical") cfgt = tsfel.get_features_by_domain(domain="temporal") x = tsfel.time_series_features_extractor(cfg, df, verbose=0) xx = tsfel.time_series_features_extractor(cfgx, df, verbose=0) xs = tsfel.time_series_features_extractor(cfgs, df, verbose=0) xt = tsfel.time_series_features_extractor(cfgt, df, verbose=0)