def __init__(self, problem=None, database="featurehub"): """Create the ORMManager and connect to DB. If problem name is given, load it. Parameters ---------- problem : str, optional (default=None) Name of problem database : str, optional (default="featurehub") Name of database within DBMS. """ self.__orm = ORMManager(database) if not database_exists(self.__orm.engine.url): print("database {} does not seem to exist.".format(database)) print("You might want to create it by calling set_up method") elif problem: try: with self.__orm.session_scope() as session: problem = session.query(Problem)\ .filter(Problem.name == problem).one() self.__problemid = problem.id except NoResultFound: print("WARNING: Problem {} does not exist!".format(problem)) print("You might want to create it by calling create_problem" " method")
def log_evaluation_attempt(user): """Log user evaluation of feature. Extracts 'database', 'problem_id', and 'code' from POST body. """ try: try: database = request.form["database"] problem_id = request.form["problem_id"] code = request.form["code"] except Exception: app.logger.exception("Couldn't read parameters from form.") app.logger.debug("Read parameters from form.") try: user_name = user["name"] orm = ORMManager(database, admin=True) with orm.session_scope() as session: user_obj = session.query(User).filter( User.name == user_name).one() problem_obj = session.query(Problem).filter( Problem.id == problem_id).one() evaluation_attempt_obj = EvaluationAttempt(user=user_obj, problem=problem_obj, code=code) session.add(evaluation_attempt_obj) except Exception: app.logger.exception("Couldn't insert evaluation attempt into " "database.") app.logger.debug("Inserted evaluation attempt into database.") finally: return Response()
def __init__(self, database="featurehub"): """Create the ORMManager and connect to DB. Parameters ---------- database : str, optional (default="featurehub") Name of database within DBMS. """ self.__orm = ORMManager(database) if not database_exists(self.__orm.engine.url): print("Database {} does not exist.".format(database), file=sys.stderr) print("You might want to create it by calling set_up method", file=sys.stderr)
def create_user(user): """Create User object in the database. """ user_name = user["name"] try: database = request.form["database"] orm = ORMManager(database, admin=True) with orm.session_scope() as session: session.add(User(name=user_name)) except Exception: app.logger.exception( "Couldn't create new user (name '{}') in db".format(user_name)) return Response(status=500) app.logger.debug("Created new user (name '{}') in db".format(user_name)) return Response(status=201)
def __init__(self, problem, database="featurehub"): self.__database = database self.__orm = ORMManager(database) self.__username = None with self.__orm.session_scope() as session: try: problem = session.query(Problem)\ .filter(Problem.name == problem)\ .one() self.__problem_id = problem.id except NoResultFound: raise ValueError("Invalid problem name: {}".format(problem)) except MultipleResultsFound: raise ValueError("Unexpected issue talking to database. " + TRY_AGAIN_LATER) # "log in" to the system self._login() # initialize evaluation client self.__evaluation_client = EvaluatorClient(self.__problem_id, self.__username, self.__orm)
def submit(user): """Process user request to submit feature. Extracts 'database', 'problem_id', 'code', and 'description' from POST body. """ try: database = request.form["database"] problem_id = request.form["problem_id"] feature_dill = request.form["feature_dill"] code = request.form["code"] description = request.form["description"] except Exception: app.logger.exception("Couldn't read parameters from form.") return EvaluationResponse( status_code=EvaluationResponse.STATUS_CODE_BAD_REQUEST) app.logger.debug("Read parameters from form.") # preprocessing # - look up the problem in the databasse # - look up the user in the database # - compute the md5 hash of the feature code # - convert the feature code into a function orm = ORMManager(database, admin=True) with orm.session_scope() as session: try: problem_obj = session.query(Problem)\ .filter(Problem.id == problem_id).one() except (NoResultFound, MultipleResultsFound) as e: app.logger.exception( "Couldn't access problem (id '{}') from db".format(problem_id)) return EvaluationResponse( status_code=EvaluationResponse.STATUS_CODE_BAD_REQUEST) except Exception: app.logger.exception( "Unexpected issue accessing problem (id '{}') from db".format( problem_id)) return EvaluationResponse( status_code=EvaluationResponse.STATUS_CODE_SERVER_ERROR) app.logger.debug( "Accessed problem (id '{}') from db".format(problem_id)) user_name = user["name"] try: user_obj = session.query(User).filter(User.name == user_name).one() except (NoResultFound, MultipleResultsFound) as e: app.logger.exception( "Couldn't access user (name '{}') from db".format(user_name)) return EvaluationResponse( status_code=EvaluationResponse.STATUS_CODE_BAD_REQUEST) app.logger.debug("Accessed user (name '{}') from db".format(user_name)) md5 = myhash(code) app.logger.debug("Computed feature hash.") evaluator = EvaluatorServer(problem_id, user_name, orm) try: is_registered = evaluator.check_if_registered(code) if is_registered: app.logger.debug("feature already registered.") return EvaluationResponse(status_code=EvaluationResponse. STATUS_CODE_DUPLICATE_FEATURE) except Exception: app.logger.exception("Unexpected error checking if feature is " "registered") return EvaluationResponse( status_code=EvaluationResponse.STATUS_CODE_SERVER_ERROR) app.logger.debug("Confirmed that feature is not already registered") try: feature = dill.loads(unquote_to_bytes(feature_dill)) except Exception: app.logger.exception( "Couldn't extract function (code '{}')".format(code)) return EvaluationResponse( status_code=EvaluationResponse.STATUS_CODE_BAD_FEATURE) app.logger.debug("Extracted function.") # processing # - compute the CV score # - compute any other metrics try: metrics = evaluator.evaluate(feature) # TODO expand schema except ValueError: app.logger.exception( "Couldn't evaluate feature (code '{}')".format(code)) # feature is invalid return EvaluationResponse( status_code=EvaluationResponse.STATUS_CODE_BAD_FEATURE) except Exception: app.logger.exception( "Unexpected error evaluating feature (code '{}')".format(code)) return EvaluationResponse( status_code=EvaluationResponse.STATUS_CODE_SERVER_ERROR) app.logger.debug("Evaluated feature.") try: # write to db feature_obj = Feature(description=description, feature_dill_quoted=feature_dill, code=code, md5=md5, user=user_obj, problem=problem_obj) session.add(feature_obj) for metric in metrics: metric_db = metric.convert(kind="db") metric_obj = Metric(feature=feature_obj, name=metric_db["name"], scoring=metric_db["scoring"], value=metric_db["value"]) session.add(metric_obj) except Exception: app.logger.exception("Unexpected error inserting into db") return EvaluationResponse( status_code=EvaluationResponse.STATUS_CODE_DB_ERROR) app.logger.debug("Inserted into db.") # post to forum problem_name = problem_obj.name if is_positive_env(os.environ.get("USE_DISCOURSE")) and \ problem_name != DEMO_PROBLEM_NAME: try: topic_url = post_feature(feature_obj, metrics) app.logger.debug("Posted to forum") except Exception: topic_url = "" app.logger.exception("Unexpected error posting to forum") else: topic_url = "" # return # - status code # - metrics dict return EvaluationResponse( status_code=EvaluationResponse.STATUS_CODE_OKAY, metrics=metrics, topic_url=topic_url, )
class Commands(object): """Admin interface for the database. Create the schema, add or remove problems, and view problems, users, and features. """ def __init__(self, problem=None, database="featurehub"): """Create the ORMManager and connect to DB. If problem name is given, load it. Parameters ---------- problem : str, optional (default=None) Name of problem database : str, optional (default="featurehub") Name of database within DBMS. """ self.__orm = ORMManager(database) if not database_exists(self.__orm.engine.url): print("database {} does not seem to exist.".format(database)) print("You might want to create it by calling set_up method") elif problem: try: with self.__orm.session_scope() as session: problem = session.query(Problem)\ .filter(Problem.name == problem).one() self.__problemid = problem.id except NoResultFound: print("WARNING: Problem {} does not exist!".format(problem)) print("You might want to create it by calling create_problem" " method") def set_up(self, drop=False): """Create a new DB and create the initial scheme. If the database exists and drop=True, the existing database is dropped and recreated. Regardless, any tables defined by the schema that do not exist are created. Parameters ---------- drop : bool, optional (default=False) Drop database if it already exists. """ # todo extract database name from engine url and report for brevity engine = self.__orm.engine if database_exists(engine.url): print("Database {} already exists.".format(engine.url)) if drop: print("Dropping old database {}".format(engine.url)) drop_database(engine.url) with possibly_talking_action("Re-creating database..."): create_database(engine.url) else: with possibly_talking_action("Creating database..."): create_database(engine.url) with possibly_talking_action("Creating tables..."): Base.metadata.create_all(engine) print("Database {} created successfully".format(engine.url)) def bulk_create_problem_yml(self, path): """Create new problem entries in database from yml document stream. Can create a yml file with individual problems delimited into documents using the `---` ... `---` document stream syntax. Parameters ---------- path: str or path-like Path to yml file """ with open(path, "r") as f: obj_all = yaml.load_all(f) for obj in obj_all: self.create_problem(**obj) def create_problem_yml(self, path): """Create new problem entry in database from yml file. Parameters ---------- path: str or path-like Path to yml file """ with open(path, "r") as f: obj = yaml.load(f) self.create_problem(**obj) def create_problem(self, name="", problem_type="", problem_type_details={}, data_dir_train="", data_dir_test="", files=[], table_names=[], entities_table_name="", entities_featurized_table_name="", target_table_name=""): """Creates new problem entry in database. Parameters ---------- name : str problem_type : str Classification or regression problem_type_details : dict Dict with additional details about problem. For example, the dict may be {"classification_type" : "multiclass"}. data_dir_train : str Absolute path of containing directory of data files for training. data_dir_test : str Absolute path of containing directory of data files for testing files : list of str List of file paths relative to data_dir; files must be named identically within both data_dir directories. table_names : list of str List of table names, corresponding exactly to `files` entities_table_name : str Name of table that contains the entity variables. Must be found in `table_names`. entities_featurized_table_name : str Name of table that contains the pre-processed, featurized, entity variables. Must be found in `table_names`. target_table_name : str Name of table that contains the target variable (label). Must be found in table_names. Table must hold a single column with label values only. """ with self.__orm.session_scope() as session: try: problem = session.query(Problem).filter(Problem.name == name).one() self.__problemid = problem.id print("Problem {} already exists".format(name)) return except NoResultFound: pass # we will create it problem = Problem( name = name, problem_type = problem_type, problem_type_details = json.dumps(problem_type_details), data_dir_train = data_dir_train, data_dir_test = data_dir_test, files = json.dumps(files), table_names = json.dumps(table_names), entities_table_name = entities_table_name, entities_featurized_table_name = entities_featurized_table_name, target_table_name = target_table_name, ) session.add(problem) self.__problemid = problem.id print("Problem {} successfully created".format(name)) def get_problems(self): """Return a list of problems in the database.""" with self.__orm.session_scope() as session: try: problems = session.query(Problem.name).all() return [problem[0] for problem in problems] except NoResultFound: return [] def get_features(self, user_name=None): """Get a DataFrame with the details about all registered features.""" with self.__orm.session_scope() as session: results = self._get_features(session, user_name).all() feature_dicts = [] for feature, user_name in results: d = { "user" : user_name, "description" : feature.description, "md5" : feature.md5, "created_at" : feature.created_at, } feature_metrics = session.query(Metric.name, Metric.value).filter(Metric.feature_id == feature.id).all() # feature_metrics = feature.metrics for metric in feature_metrics: d[metric.name] = metric.value feature_dicts.append(d) if not feature_dicts: print("No features found") else: return pd.DataFrame(feature_dicts) def _get_features(self, session, user_name=None): """Return a query filtering a given user for the current problem. Parameters ---------- user_name : str, optional(default=None) If no user name provided, returns features for all users. """ #TODO pivot metrics tables query = session.query(Feature, User.name) #query = session.query(Feature, User.name, Metric) if user_name: query = query.filter(User.name == user_name) query = query.filter(Feature.problem_id == self.__problemid) return query
class Session(object): """Represents a user's session within FeatureHub. Includes commands for discovering, testing, and registering new features. """ def __init__(self, problem, database="featurehub"): self.__database = database self.__orm = ORMManager(database) self.__username = None with self.__orm.session_scope() as session: try: problem = session.query(Problem)\ .filter(Problem.name == problem)\ .one() self.__problem_id = problem.id except NoResultFound: raise ValueError("Invalid problem name: {}".format(problem)) except MultipleResultsFound: raise ValueError("Unexpected issue talking to database. " + TRY_AGAIN_LATER) # "log in" to the system self._login() # initialize evaluation client self.__evaluation_client = EvaluatorClient(self.__problem_id, self.__username, self.__orm) @property def __dataset(self): return self.__evaluation_client.dataset @property def __entities_featurized(self): return self.__evaluation_client.entities_featurized @property def __target(self): return self.__evaluation_client.target @staticmethod def _eval_server_post(route, data): url = "http://{}:{}/services/eval-server/{}".format( os.environ.get("EVAL_CONTAINER_NAME"), os.environ.get("EVAL_CONTAINER_PORT"), route) headers = { "Authorization": "token {}".format(os.environ.get("JUPYTERHUB_API_TOKEN")), } return requests.post(url=url, data=data, headers=headers) def _login(self): name = os.environ.get("USER") if not name: raise ValueError("Missing environment variable 'USER'. FeatureHub" " session not initialized.") with self.__orm.session_scope() as session: try: user = session.query(User)\ .filter(User.name == name)\ .one() self.__username = user.name except NoResultFound: data = {"database": self.__orm.database} response = Session._eval_server_post("create-user", data) if response.ok: self.__username = name else: raise ValueError("Couldn't log in to FeatureHub. " \ + TRY_AGAIN_LATER) except MultipleResultsFound as e: raise ValueError("Unexpected error logging in to FeatureHub. " \ + TRY_AGAIN_LATER) def get_sample_dataset(self): """Loads sample of problem training dataset. Returns the dataset a dict mapping table names to pandas DataFrames. Returns ------- dataset : dict (str => pd.DataFrame) A dict mapping table names to pandas DataFrames. target : pd.DataFrame A DataFrame that holds a single column: the target variable (label). Examples -------- >>> dataset = commands.get_sample_dataset() >>> dataset["users"] # -> returns DataFrame >>> dataset["stores"] # -> returns DataFrame """ self.__evaluation_client._load_dataset() # Return a *copy* of the dataset, ensuring we have enough memory. gc.collect() dataset = { table_name: self.__dataset[table_name].copy() for table_name in self.__dataset } target = self.__target.copy() # pylint: disable=no-member return (dataset, target) def get_entity_features(self): """Loads preprocessed entity-level features of problem training dataset. The entity-level features are the same length as the entity DataFrame and the target DataFrame. Returns ------- entity_features : pd.DataFrame or None """ self.__evaluation_client._load_dataset() if not pd.DataFrame(self.__entities_featurized).empty: entity_features = self.__entities_featurized.copy() else: entity_features = None return entity_features def discover_features(self, code_fragment=None): """Print features written by other users. A code fragment can be used to filter search results. For each feature, prints feature id, feature description, metrics, and source code. Parameters ---------- code_fragment : string, default=None Source code fragment to filter for. """ self._print_some_features(code_fragment, User.name != self.__username) def print_my_features(self, code_fragment=None): """Print features written by me. A code fragment can be used to filter search results. For each feature, prints feature id, feature description, metrics, and source code. Parameters ---------- code_fragment : string, default=None Source code fragment to filter for. """ self._print_some_features(code_fragment, User.name == self.__username) def _print_some_features(self, code_fragment, predicate): """Driver function for discover_features and print_my_features.""" with self.__orm.session_scope() as session: query = self._filter_features(session, code_fragment) # Filter only users that are not me query = query.join(Feature.user).filter(predicate) features = query.all() if features: for feature in features: # Join with metrics table query = session.query(Metric.name, Metric.value)\ .filter(Metric.feature_id == feature.id) metrics = query.all() metric_list = [(metric.name, metric.value) for metric in metrics] self._print_one_feature(feature.description, feature.id, feature.code, metric_list) else: print("No features found.") def evaluate(self, feature): """Evaluate feature on training dataset and return key performance metrics. Runs the feature in an isolated environment to extract the feature values. Validates the feature values. Then, builds a model on that one feature and computes key cross-validated metrics. Prints results and returns a dictionary with (metric => value) entries. If the feature is invalid, prints reason and returns empty dictionary. Parameters ---------- feature : function Feature to evaluate """ if self.__evaluation_client.check_if_registered(feature, verbose=True): return return self.__evaluation_client.evaluate(feature) def submit(self, feature, description=""): """Submit feature to server for evaluation on test data. If successful, registers feature in feature database and returns key performance metrics. Runs the feature in an isolated environment to extract the feature values. Validates the feature values. Then, builds a model on that one feature, performs cross validation, and returns key performance metrics. Parameters ---------- feature : function Feature to evaluate description : str Feature description. If left empty, will prompt for user imput. """ if not description: description = self._prompt_description() self.__evaluation_client.submit(feature, description) def _filter_features(self, session, code_fragment): """Return query that filters this problem and given code fragment. Return a query object that filters features written for the appropriate problem by code snippets. This query object can be added to by the caller. """ filter_ = (Feature.problem_id == self.__problem_id, ) if code_fragment: filter_ = filter_ + (Feature.code.contains(code_fragment), ) return session.query(Feature).filter(*filter_) def _prompt_description(self): """Prompt user for description of feature""" print( "First, enter feature description. Your feature description " "should be clear, concise, and meaningful to non-data scientists." " (If your feature fails to register, this description will be " "discarded.)") description = input("Enter description: ") print("") return description @staticmethod def _print_one_feature(feature_description, feature_id, feature_code, metric_list): """Print one feature in user-readable format. Parameters ---------- feature_description : str feature_id : int feature_code : str metric_list : MetricList Examples -------- >>> Session._print_one_feature("Age", 1, "def age(dataset): pass\n", metric_list_) ------------------- Feature id: 1 Feature description: Age Feature code: def age(dataset): pass Feature metrics: Accuracy: 0.5 ROC AUC: 0.35 """ result = "------------------\n" + \ "Feature id: {}\n".format(feature_id) + \ "Feature description: {}\n".format(feature_description) result += "\n" + \ "Feature code:\n" indent = " " tmp = [] for line in feature_code.split("\n"): tmp.append(indent + line) result += "\n".join(tmp) result += "\n" + \ "Feature metrics:\n" for metric_name, metric_value in metric_list: result += " {}: {}\n".format(metric_name, metric_value) print(result)
class Commands(object): """Admin interface for the database. Create the schema, add or remove problems, and view problems, users, and features. """ def __init__(self, database="featurehub"): """Create the ORMManager and connect to DB. Parameters ---------- database : str, optional (default="featurehub") Name of database within DBMS. """ self.__orm = ORMManager(database) if not database_exists(self.__orm.engine.url): print("Database {} does not exist.".format(database), file=sys.stderr) print("You might want to create it by calling set_up method", file=sys.stderr) def set_up(self, drop=False): """Create a new DB and create the initial scheme. If the database exists and drop=True, the existing database is dropped and recreated. Regardless, any tables defined by the schema that do not exist are created. Parameters ---------- drop : bool, optional (default=False) Drop database if it already exists. """ # todo extract database name from engine url and report for brevity engine = self.__orm.engine if database_exists(engine.url): print("Database {} already exists.".format(engine.url)) if drop: print("Dropping old database {}".format(engine.url)) drop_database(engine.url) with possibly_talking_action("Re-creating database..."): create_database(engine.url) else: with possibly_talking_action("Creating database..."): create_database(engine.url) with possibly_talking_action("Creating tables..."): Base.metadata.create_all(engine) print("Database {} created successfully".format(engine.url)) def bulk_create_problem_yml(self, path): """Create new problem entries in database from yml document stream. Can create a yml file with individual problems delimited into documents using the `---` ... `---` document stream syntax. Parameters ---------- path: str or path-like Path to yml file """ with open(path, "r") as f: obj_all = yaml.load_all(f) for obj in obj_all: self.create_problem(**obj) def create_problem_yml(self, path): """Create new problem entry in database from yml file. Parameters ---------- path: str or path-like Path to yml file """ with open(path, "r") as f: obj = yaml.load(f) self.create_problem(**obj) def create_problem(self, name="", problem_type="", problem_type_details={}, data_dir_train="", data_dir_test="", files=[], table_names=[], entities_table_name="", entities_featurized_table_name="", target_table_name=""): """Creates new problem entry in database. Parameters ---------- name : str problem_type : str Classification or regression problem_type_details : dict Dict with additional details about problem. For example, the dict may be {"classification_type" : "multiclass"}. data_dir_train : str Absolute path of containing directory of data files for training. data_dir_test : str Absolute path of containing directory of data files for testing files : list of str List of file paths relative to data_dir; files must be named identically within both data_dir directories. table_names : list of str List of table names, corresponding exactly to `files` entities_table_name : str Name of table that contains the entity variables. Must be found in `table_names`. entities_featurized_table_name : str Name of table that contains the pre-processed, featurized, entity variables. Must be found in `table_names`. target_table_name : str Name of table that contains the target variable (label). Must be found in table_names. Table must hold a single column with label values only. """ with self.__orm.session_scope() as session: try: problem = session.query(Problem).filter( Problem.name == name).one() print("Problem {} already exists".format(name)) return except NoResultFound: pass # we will create it problem = Problem( name=name, problem_type=problem_type, problem_type_details=json.dumps(problem_type_details), data_dir_train=data_dir_train, data_dir_test=data_dir_test, files=json.dumps(files), table_names=json.dumps(table_names), entities_table_name=entities_table_name, entities_featurized_table_name=entities_featurized_table_name, target_table_name=target_table_name, ) session.add(problem) print("Problem {} successfully created".format(name)) def get_problems(self): """Return a list of problems in the database.""" with self.__orm.session_scope() as session: try: problems = session.query(Problem.name).all() return [problem[0] for problem in problems] except NoResultFound: return [] def get_features(self, problem_name=None, user_name=None): """Get a DataFrame with the details about all registered features.""" with self.__orm.session_scope() as session: results = self._get_features(session, problem_name, user_name).all() feature_dicts = [] for feature, user_name in results: d = { "user": user_name, "description": feature.description, "md5": feature.md5, "created_at": feature.created_at, } feature_metrics = session.query( Metric.name, Metric.value).filter( Metric.feature_id == feature.id).all() # feature_metrics = feature.metrics for metric in feature_metrics: d[metric.name] = metric.value feature_dicts.append(d) if not feature_dicts: print("No features found") else: return pd.DataFrame(feature_dicts) def _get_features(self, session, problem_name="", user_name=""): """Return a query filtering a given user for the current problem. Parameters ---------- problem_name : str, optional If no problem name provided, returns features for all problems. user_name : str, optional If no user name provided, returns features for all users. """ #TODO pivot metrics tables query = session.query(Feature, User.name) #query = session.query(Feature, User.name, Metric) if user_name: query = query.filter(User.name == user_name) if problem_name: query = query.filter(Feature.problem.name == problem_name) return query def load_dataset(self, problem_name="", split="train"): """Load dataset for given problem with given split. Parameters ---------- problem_name : str, optional If no problem name provided, returns dataset for *first problem in database*. split : str Valid options include "train", "test", "both" (concatenated) """ orm = self.__orm username = "******" # should be unused (unless submit new feature to db) with orm.session_scope() as session: if not problem_name: problem_name = session.query(Problem.name)\ .filter(Problem.name != "demo").scalar() problem_id = session.query(Problem.id)\ .filter(Problem.name == problem_name).scalar() data_dir = os.path.join("/data", split) dataset, entities_featurized, target = load_dataset_from_dir( session, data_dir, problem_name) suffix = "_" + split return problem_name, dataset, entities_featurized, target def _extract_everything(self, suffix): with self.__orm.session_scope() as session: build_and_save_all_features(self, session, suffix) extract_and_save_all_tables(session, suffix) def _get_final_model_X_Y(self, problem_name, split, suffix): feature_matrix = load_feature_matrix(problem_name, split, suffix) _, _, entities_featurized, target = self.load_dataset( problem_name=problem_name, split=split) X = pd.concat([entities_featurized, feature_matrix], axis=1) Y = target return X, Y def _train_model(self, problem_name, split, suffix, **kwargs): with self.__orm.session_scope() as session: result = session.query(Problem)\ .filter(Problem.name == problem_name).one() problem_type = result.problem_type X_train, Y_train = self._get_final_model_X_Y(problem_name, split, suffix) automl = AutoModel(problem_type, **kwargs) # TODO increase time automl.fit(X_train, Y_train, dataset_name=problem_name) absname = prepare_automl_file_name(problem_name, split, suffix) automl.dump(absname) return automl, X_train, Y_train def _do_final_model(self, problem_name, suffix, split_train="train", split_test="test", **kwargs): """ """ automl, X_train, Y_train = self._train_model(problem_name, split_train, suffix, **kwargs) X_test, Y_test = self._get_final_model_X_Y(problem_name, split_test, suffix) if automl._is_classification(): Y_test_pred = automl.predict_proba(X_test) else: Y_test_pred = automl.predict(X_test) return automl, X_train, Y_train, X_test, Y_test, Y_test_pred