('cat', Pipeline(steps=[('b', OrdinalEncoder())]), categorical_features)]).fit_transform(data) encoded = pd.DataFrame(encoded, columns=reorder_features) return encoded data = pd.read_csv( '../../results/pipeline/features_rebalance/meta_learner/ts_all.csv') columns = data.columns data = SimpleImputer(strategy="constant", fill_value=0.0).fit_transform(data) data = pd.DataFrame(data=data, columns=columns) data = encode_data(data) X, y = data.drop(columns=['class']), data['class'] print(data) estimator = autosklearn.classification.AutoSklearnClassifier() # scores = cross_validate(estimator, # X, # encode_data(pd.DataFrame(y)), # scoring=["accuracy", "balanced_accuracy", "precision_macro", "recall_macro", "f1_macro"], # cv = 10, # return_train_score=True, # return_estimator=True) # results = {'train': { # 'accuracy': (scores['train_accuracy'].mean(), scores['train_accuracy'].std() * 2), # 'balanced_accuracy': (scores['train_balanced_accuracy'].mean(), scores['train_balanced_accuracy'].std() * 2), # 'precision_macro': (scores['train_precision_macro'].mean(), scores['train_precision_macro'].std() * 2),
def transform(self, X, y=None): #Feature Engeneering X = self.add_features(X) #Feature Selection if self.X_features != 'all': X = X[self.X_features] #Used to rename X data X_columns = X.columns #Imputing Missing Values/Dropping Missing Values Xrows_with_nan = [ index for index, row in X.iterrows() if row.isnull().any() ] if self.X_imputer_strat == 'drop': X = X.drop(Xrows_with_nan).reset_index(drop=True) else: X = SimpleImputer(strategy=self.X_imputer_strat, fill_value=self.X_fill_value).fit_transform(X) #Reshaping/Renaming X X = pd.DataFrame(X, columns=X_columns) #Handles case when only X needs to be transformed; i.e. Predictions; if type(y) == type(None): return X #Drops Y-Values so that if X-NaN values were dropped, the data still # matches with the target data if self.X_imputer_strat == 'drop': y = y.drop(Xrows_with_nan).reset_index(drop=True) #Reshaping y y = pd.DataFrame(y, columns=["Half Life (Seconds)" ]).reset_index(drop=True) #Creating Target Vectors y = self.add_magnitude_and_value_features(y) isStable = self.get_isStable_feature(y, self.magnitude_threshold, self.seconds_threshold) y_columns = y.columns #Imputing Missing Values/Dropping Missing Values Yrows_with_nan = [ index for index, row in pd.DataFrame(y).iterrows() if row.isnull().any() ] if self.y_imputer_strat == 'drop': y = y.drop(Yrows_with_nan).reset_index(drop=True) X = X.drop(Yrows_with_nan).reset_index(drop=True) isStable = isStable.drop(Yrows_with_nan).reset_index(drop=True) else: y = SimpleImputer(strategy=self.y_imputer_strat, fill_value=self.y_fill_value).fit_transform(y) #Reshaping/Renaming y y = pd.DataFrame(y, columns=y_columns) #Choosing Target Vector if self.target_vector == 'Seconds': y = y['Half Life (Seconds)'] elif self.target_vector == 'Magnitude': y = y['Magnitude of Value'] if self.prediction_type == 'Binary': y = isStable #Returns X, y return X, y