def specific_base_learner_origin(id): path = functions.get_path_from_query_string(request) with functions.DBContextManager(path) as session: base_learner_origin = session.query(models.BaseLearnerOrigin).filter_by(id=id).first() if base_learner_origin is None: raise exceptions.UserError('Base learner origin {} not found'.format(id), 404) if request.method == 'GET': return jsonify(base_learner_origin.serialize) if request.method == 'PATCH': if base_learner_origin.final: raise exceptions.UserError('Cannot modify a final base learner origin') req_body = request.get_json() modifiable_attr = ('meta_feature_generator', 'name', 'source', 'metric_generators') for attr in modifiable_attr: if attr in req_body: setattr(base_learner_origin, attr, req_body[attr]) session.add(base_learner_origin) session.commit() return jsonify(base_learner_origin.serialize) if request.method == 'DELETE': base_learner_origin.cleanup(path) session.delete(base_learner_origin) session.commit() return jsonify(message='Deleted base learner origin')
def export_stacked_ensemble(id): path = functions.get_path_from_query_string(request) with functions.DBContextManager(path) as session: stacked_ensemble = session.query( models.StackedEnsemble).filter_by(id=id).first() if stacked_ensemble is None: raise exceptions.UserError( 'Stacked ensemble {} not found'.format(id), 404) extraction = session.query(models.Extraction).first() if request.method == 'POST': req_body = request.get_json() if req_body['type'] == 'package': stacked_ensemble.export_as_package( os.path.join(path, req_body['name']), extraction.meta_feature_generation['source']) elif req_body['type'] == 'file': if not req_body['name'].endswith('.py'): req_body['name'] += '.py' stacked_ensemble.export_as_file( os.path.join(path, req_body['name']), extraction.meta_feature_generation['source']) return jsonify( message='Stacked ensemble successfully ' 'exported as {} in {}'.format(req_body['name'], path))
def export_stacked_ensemble_as_base_learner_origin(id): path = functions.get_path_from_query_string(request) with functions.DBContextManager(path) as session: stacked_ensemble = session.query( models.StackedEnsemble).filter_by(id=id).first() if stacked_ensemble is None: raise exceptions.UserError( 'Stacked ensemble {} not found'.format(id), 404) extraction = session.query(models.Extraction).first() if request.method == 'POST': source = stacked_ensemble.export_as_code( extraction.meta_feature_generation['source']) new_base_learner_origin = models.BaseLearnerOrigin( source=source, name='Xcessiv Ensemble', meta_feature_generator=stacked_ensemble.base_learner_origin. meta_feature_generator) session.add(new_base_learner_origin) session.commit() return jsonify(new_base_learner_origin.serialize)
def verify_base_learner_origin(id): path = functions.get_path_from_query_string(request) with functions.DBContextManager(path) as session: base_learner_origin = session.query( models.BaseLearnerOrigin).filter_by(id=id).first() if base_learner_origin is None: raise exceptions.UserError( 'Base learner origin {} not found'.format(id), 404) if request.method == 'POST': req_body = request.get_json() if base_learner_origin.final: raise exceptions.UserError('Base learner origin {} ' 'is already final'.format(id)) base_learner = base_learner_origin.return_estimator() validation_results, hyperparameters = functions.verify_estimator_class( base_learner, base_learner_origin.meta_feature_generator, base_learner_origin.metric_generators, req_body['dataset']) base_learner_origin.validation_results = { req_body['dataset']: validation_results } base_learner_origin.hyperparameters = hyperparameters session.add(base_learner_origin) session.commit() return jsonify(base_learner_origin.serialize)
def start_automated_run(id): """This starts an automated run using the passed in source code for configuration""" path = functions.get_path_from_query_string(request) req_body = request.get_json() with functions.DBContextManager(path) as session: base_learner_origin = session.query(models.BaseLearnerOrigin).filter_by(id=id).first() if base_learner_origin is None: raise exceptions.UserError('Base learner origin {} not found'.format(id), 404) if not base_learner_origin.final: raise exceptions.UserError('Base learner origin {} is not final'.format(id)) # Check for any syntax errors module = functions.import_string_code_as_module(req_body['source']) del module automated_run = models.AutomatedRun(req_body['source'], 'queued', base_learner_origin) session.add(automated_run) session.commit() with Connection(get_redis_connection()): rqtasks.start_automated_run.delay(path, automated_run.id) return jsonify(automated_run.serialize)
def confirm_base_learner_origin(id): path = functions.get_path_from_query_string(request) with functions.DBContextManager(path) as session: base_learner_origin = session.query(models.BaseLearnerOrigin).filter_by(id=id).first() if base_learner_origin is None: raise exceptions.UserError('Base learner origin {} not found'.format(id), 404) if request.method == 'GET': if base_learner_origin.final: raise exceptions.UserError('Base learner origin {} ' 'is already final'.format(id)) if not base_learner_origin.validation_results: raise exceptions.UserError('Base learner origin {} has not yet been ' 'verified on a dataset'.format(id)) base_learner = base_learner_origin.return_estimator() validation_results, hyperparameters = functions.verify_estimator_class( base_learner, base_learner_origin.meta_feature_generator, base_learner_origin.metric_generators, base_learner_origin.validation_results['dataset'] ) base_learner_origin.validation_results = { 'dataset': base_learner_origin.validation_results['dataset'], 'metrics': validation_results } base_learner_origin.hyperparameters = hyperparameters base_learner_origin.final = True session.add(base_learner_origin) session.commit() return jsonify(base_learner_origin.serialize)
def get_automated_runs(): """Return all automated runs""" path = functions.get_path_from_query_string(request) if request.method == 'GET': with functions.DBContextManager(path) as session: automated_runs = session.query(models.AutomatedRun).all() return jsonify(list(map(lambda x: x.serialize, automated_runs)))
def search_base_learner(id): """Creates a set of base learners from base learner origin using grid search and queues them up """ path = functions.get_path_from_query_string(request) req_body = request.get_json() if req_body['method'] == 'grid': param_grid = functions.import_object_from_string_code( req_body['source'], 'param_grid') iterator = ParameterGrid(param_grid) elif req_body['method'] == 'random': param_distributions = functions.import_object_from_string_code( req_body['source'], 'param_distributions') iterator = ParameterSampler(param_distributions, n_iter=req_body['n_iter']) else: raise exceptions.UserError('{} not a valid search method'.format( req_body['method'])) with functions.DBContextManager(path) as session: base_learner_origin = session.query( models.BaseLearnerOrigin).filter_by(id=id).first() if base_learner_origin is None: raise exceptions.UserError( 'Base learner origin {} not found'.format(id), 404) if not base_learner_origin.final: raise exceptions.UserError( 'Base learner origin {} is not final'.format(id)) learners = [] for params in iterator: est = base_learner_origin.return_estimator() try: est.set_params(**params) except Exception as e: print(repr(e)) continue hyperparameters = functions.make_serializable(est.get_params()) base_learners = session.query(models.BaseLearner).\ filter_by(base_learner_origin_id=id, hyperparameters=hyperparameters).all() if base_learners: # already exists continue base_learner = models.BaseLearner(hyperparameters, 'queued', base_learner_origin) session.add(base_learner) session.commit() with Connection(get_redis_connection()): rqtasks.generate_meta_features.delay(path, base_learner.id) learners.append(base_learner) return jsonify(map(lambda x: x.serialize, learners))
def verify_extraction_test_dataset(): path = functions.get_path_from_query_string(request) with functions.DBContextManager(path) as session: extraction = session.query(models.Extraction).first() X, y = extraction.return_test_dataset() return jsonify(functions.verify_dataset(X, y))
def create_new_stacked_ensemble(): path = functions.get_path_from_query_string(request) req_body = request.get_json() with functions.DBContextManager(path) as session: if request.method == 'GET': return jsonify( list( map(lambda x: x.serialize, session.query(models.StackedEnsemble).all()))) if request.method == 'POST': base_learners = session.query(models.BaseLearner).\ filter(models.BaseLearner.id.in_(req_body['base_learner_ids'])).all() if len(base_learners) != len(req_body['base_learner_ids']): raise exceptions.UserError('Not all base learners found') for learner in base_learners: if learner.job_status != 'finished': raise exceptions.UserError( 'Not all base learners have finished') base_learner_origin = session.query(models.BaseLearnerOrigin).\ filter_by(id=req_body['base_learner_origin_id']).first() if base_learner_origin is None: raise exceptions.UserError( 'Base learner origin {} not ' 'found'.format(req_body['base_learner_origin_id']), 404) # Retrieve full hyperparameters est = base_learner_origin.return_estimator() params = functions.import_object_from_string_code\ (req_body['secondary_learner_hyperparameters_source'], 'params') est.set_params(**params) hyperparameters = functions.make_serializable(est.get_params()) stacked_ensembles = session.query(models.StackedEnsemble).\ filter_by(base_learner_origin_id=req_body['base_learner_origin_id'], secondary_learner_hyperparameters=hyperparameters, base_learner_ids=sorted([bl.id for bl in base_learners])).all() if stacked_ensembles: raise exceptions.UserError('Stacked ensemble exists') stacked_ensemble = models.StackedEnsemble( secondary_learner_hyperparameters=hyperparameters, base_learners=base_learners, base_learner_origin=base_learner_origin, job_status='queued') session.add(stacked_ensemble) session.commit() with Connection(get_redis_connection()): rqtasks.evaluate_stacked_ensemble.delay( path, stacked_ensemble.id) return jsonify(stacked_ensemble.serialize)
def verify_extraction_meta_feature_generation(): path = functions.get_path_from_query_string(request) with functions.DBContextManager(path) as session: extraction = session.query(models.Extraction).first() if extraction.meta_feature_generation['method'] == 'cv': raise exceptions.UserError('Xcessiv will use cross-validation to' ' generate meta-features') X_holdout, y_holdout = extraction.return_holdout_dataset() return jsonify(functions.verify_dataset(X_holdout, y_holdout))
def verify_full_extraction(): """This is an experimental endpoint to simultaneously verify data statistics and extraction for training, test, and holdout datasets. With this, the other three verification methods will no longer be necessary. """ path = functions.get_path_from_query_string(request) if request.method == 'POST': rqtasks.extraction_data_statistics(path) with functions.DBContextManager(path) as session: extraction = session.query(models.Extraction).first() return jsonify(extraction.data_statistics)
def get_automated_runs(): """Return all automated runs""" path = functions.get_path_from_query_string(request) if request.method == 'GET': with functions.DBContextManager(path) as session: automated_runs = session.query(models.AutomatedRun).all() return jsonify(list(map(lambda x: x.serialize, automated_runs))) if request.method == 'POST': req_body = request.get_json() with functions.DBContextManager(path) as session: base_learner_origin = None if req_body['category'] == 'bayes' or req_body[ 'category'] == 'greedy_ensemble_search': base_learner_origin = session.query(models.BaseLearnerOrigin).\ filter_by(id=req_body['base_learner_origin_id']).first() if base_learner_origin is None: raise exceptions.UserError( 'Base learner origin {} not found'.format( req_body['base_learner_origin_id']), 404) if not base_learner_origin.final: raise exceptions.UserError( 'Base learner origin {} is not final'.format( req_body['base_learner_origin_id'])) elif req_body['category'] == 'tpot': pass else: raise exceptions.UserError('Automated run category' ' {} not recognized'.format( req_body['category'])) # Check for any syntax errors module = functions.import_string_code_as_module(req_body['source']) del module automated_run = models.AutomatedRun(req_body['source'], 'queued', req_body['category'], base_learner_origin) session.add(automated_run) session.commit() with Connection(get_redis_connection()): rqtasks.start_automated_run.delay(path, automated_run.id) return jsonify(automated_run.serialize)
def get_base_learners(): path = functions.get_path_from_query_string(request) with functions.DBContextManager(path) as session: base_learners = session.query(models.BaseLearner).all() if request.method == 'GET': return jsonify(list(map(lambda x: x.serialize, base_learners))) if request.method == 'DELETE': # Go crazy and delete everything for base_learner in base_learners: base_learner.delete_meta_features(path) session.delete(base_learner) session.commit() return jsonify(message='Deleted all base learners')
def specific_automated_run(id): path = functions.get_path_from_query_string(request) with functions.DBContextManager(path) as session: automated_run = session.query(models.AutomatedRun).filter_by(id=id).first() if automated_run is None: raise exceptions.UserError('Automated run {} not found'.format(id), 404) if request.method == 'GET': return jsonify(automated_run.serialize) if request.method == 'DELETE': session.delete(automated_run) session.commit() return jsonify(message='Deleted automated run')
def specific_stacked_ensemble(id): path = functions.get_path_from_query_string(request) with functions.DBContextManager(path) as session: stacked_ensemble = session.query(models.StackedEnsemble).filter_by(id=id).first() if stacked_ensemble is None: raise exceptions.UserError('Stacked ensemble {} not found'.format(id), 404) if request.method == 'GET': return jsonify(stacked_ensemble.serialize) if request.method == 'DELETE': session.delete(stacked_ensemble) session.commit() return jsonify(message='Deleted stacked ensemble')
def create_base_learner(id): """This creates a single base learner from a base learner origin and queues it up""" path = functions.get_path_from_query_string(request) with functions.DBContextManager(path) as session: base_learner_origin = session.query( models.BaseLearnerOrigin).filter_by(id=id).first() if base_learner_origin is None: raise exceptions.UserError( 'Base learner origin {} not found'.format(id), 404) if not base_learner_origin.final: raise exceptions.UserError( 'Base learner origin {} is not final'.format(id)) req_body = request.get_json() # Retrieve full hyperparameters est = base_learner_origin.return_estimator() hyperparameters = functions.import_object_from_string_code( req_body['source'], 'params') est.set_params(**hyperparameters) hyperparameters = functions.make_serializable(est.get_params()) base_learners = session.query(models.BaseLearner).\ filter_by(base_learner_origin_id=id, hyperparameters=hyperparameters).all() if base_learners: raise exceptions.UserError( 'Base learner exists with given hyperparameters') base_learner = models.BaseLearner(hyperparameters, 'queued', base_learner_origin) if 'single_searches' not in base_learner_origin.description: base_learner_origin.description['single_searches'] = [] base_learner_origin.description['single_searches'] += ([ req_body['source'] ]) session.add(base_learner) session.add(base_learner_origin) session.commit() with Connection(get_redis_connection()): rqtasks.generate_meta_features.delay(path, base_learner.id) return jsonify(base_learner.serialize)
def specific_base_learner(id): path = functions.get_path_from_query_string(request) with functions.DBContextManager(path) as session: base_learner = session.query(models.BaseLearner).filter_by(id=id).first() if base_learner is None: raise exceptions.UserError('Base learner {} not found'.format(id), 404) if request.method == 'GET': return jsonify(base_learner.serialize) if request.method == 'DELETE': base_learner.cleanup(path) session.delete(base_learner) session.commit() return jsonify(message='Deleted base learner')
def base_learner_origins_view(): path = functions.get_path_from_query_string(request) if request.method == 'GET': with functions.DBContextManager(path) as session: base_learner_origins = session.query(models.BaseLearnerOrigin).all() return jsonify(list(map(lambda x: x.serialize, base_learner_origins))) if request.method == 'POST': # Create new base learner origin req_body = request.get_json() new_base_learner_origin = models.BaseLearnerOrigin(**req_body) with functions.DBContextManager(path) as session: session.add(new_base_learner_origin) session.commit() return jsonify(new_base_learner_origin.serialize)
def extraction_test_dataset(): path = functions.get_path_from_query_string(request) if request.method == 'GET': with functions.DBContextManager(path) as session: extraction = session.query(models.Extraction).first() return jsonify(extraction.test_dataset) if request.method == 'PATCH': req_body = request.get_json() with functions.DBContextManager(path) as session: extraction = session.query(models.Extraction).first() for key, value in six.iteritems(req_body): extraction.test_dataset[key] = value session.add(extraction) session.commit() return jsonify(extraction.test_dataset)