class ClassificationPipeline(Pipeline): """ Classification pipeline """ def __init__(self, **pipeline_constructor_params): self.estimator = AutoSklearnClassifier(**pipeline_constructor_params) def fit(self, x, y): self.estimator.fit(x, y) def run(self, x): return self.estimator.predict(x) def as_json(self): pipeline = self.estimator.get_models_with_weights()[0][1] return pipeline.config.get_dictionary()
def fit_automl(self, run_time): """Runs auto-sklearn on the uploaded data and prints results. Side effects: - Enables upload_widget Args: run_time (int): The run time for auto-sklearn in seconds. Returns: automl (AutoSklearnClassifier): fitted auto-sklearn model. """ automl_args = {} automl_args['time_left_for_this_task'] = run_time # TODO functionality to load this from Mongo automl_args['metadata_directory'] = ".metalearning/metalearning_files/" #automl_args['metadata_directory'] = "../metalearning/metalearning_files/" automl = AutoSklearnClassifier(**automl_args) thread = threading.Thread(target=self.update_progress, args=(self.progress_widget, )) thread.start() # always load a copy of the latest dataset cur_data = self.data[-1].copy() y = cur_data.pop(0) X, feat_types, _ = model_utils.process_feat_types(cur_data) X_train = X.iloc[self.train_idxs] y_train = y.iloc[self.train_idxs] X_test = X.iloc[self.test_idxs] y_test = y.iloc[self.test_idxs] with warnings.catch_warnings(): warnings.simplefilter("ignore") with HiddenPrints(): automl.fit(X_train, y_train, feat_type=feat_types) # Automl has finished fitting: self.models.append(copy.deepcopy(automl)) with self.event_output_widget: print("FITTING COMPLETED WITH FITTING TIME PARAMETER AS ", int(run_time / 60), " MINUTES") with self.metrics_output_widget: y_train_hat = automl.predict(X_train) train_accuracy_score = metrics.accuracy_score(y_train, y_train_hat) y_test_hat = automl.predict(X_test) test_accuracy_score = metrics.accuracy_score(y_test, y_test_hat) thresholdout_score = model_utils.thresholdout( train_accuracy_score, test_accuracy_score) output_str = "Run {}: train acc: {:.4}, noised test acc: {:.4}\n".format( self.queries, train_accuracy_score, thresholdout_score) print(output_str) with self.model_output_widget: print("MODELS:") print(automl.get_models_with_weights()) if self.textbox_upload: self.upload_button.disabled = False self.upload_text.disabled = False else: self.upload_widget.disabled = False if self.queries == self.budget_widget.value: self.on_budget_completion() return automl
data = dataframe.values X, y = data[:, :-1], data[:, -1] # minimally prepare dataset X = X.astype('float32') y = LabelEncoder().fit_transform(y.astype('str')) # split into train and test sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1) # define search model = AutoSklearnClassifier(time_left_for_this_task=10*60, per_run_time_limit=45, n_jobs=6) # perform the search model.fit(X_train, y_train) # summarize print(model.sprint_statistics()) # get model and weights model_weights = model.get_models_with_weights() for model_weight in model_weights: print(model_weight) print("Show models") models_def = model.show_models() print(models_def) # evaluate best model y_hat = model.predict(X_test) acc = accuracy_score(y_test, y_hat) print("Test Dataset Accuracy: %.3f" % acc)