def search_base_learner(id): """Creates a set of base learners from base learner origin using grid search and queues them up """ path = functions.get_path_from_query_string(request) req_body = request.get_json() if req_body['method'] == 'grid': param_grid = functions.import_object_from_string_code( req_body['source'], 'param_grid') iterator = ParameterGrid(param_grid) elif req_body['method'] == 'random': param_distributions = functions.import_object_from_string_code( req_body['source'], 'param_distributions') iterator = ParameterSampler(param_distributions, n_iter=req_body['n_iter']) else: raise exceptions.UserError('{} not a valid search method'.format( req_body['method'])) with functions.DBContextManager(path) as session: base_learner_origin = session.query( models.BaseLearnerOrigin).filter_by(id=id).first() if base_learner_origin is None: raise exceptions.UserError( 'Base learner origin {} not found'.format(id), 404) if not base_learner_origin.final: raise exceptions.UserError( 'Base learner origin {} is not final'.format(id)) learners = [] for params in iterator: est = base_learner_origin.return_estimator() try: est.set_params(**params) except Exception as e: print(repr(e)) continue hyperparameters = functions.make_serializable(est.get_params()) base_learners = session.query(models.BaseLearner).\ filter_by(base_learner_origin_id=id, hyperparameters=hyperparameters).all() if base_learners: # already exists continue base_learner = models.BaseLearner(hyperparameters, 'queued', base_learner_origin) session.add(base_learner) session.commit() with Connection(get_redis_connection()): rqtasks.generate_meta_features.delay(path, base_learner.id) learners.append(base_learner) return jsonify(map(lambda x: x.serialize, learners))
def create_base_learner(id): """This creates a single base learner from a base learner origin and queues it up""" path = functions.get_path_from_query_string(request) with functions.DBContextManager(path) as session: base_learner_origin = session.query( models.BaseLearnerOrigin).filter_by(id=id).first() if base_learner_origin is None: raise exceptions.UserError( 'Base learner origin {} not found'.format(id), 404) if not base_learner_origin.final: raise exceptions.UserError( 'Base learner origin {} is not final'.format(id)) req_body = request.get_json() # Retrieve full hyperparameters est = base_learner_origin.return_estimator() hyperparameters = functions.import_object_from_string_code( req_body['source'], 'params') est.set_params(**hyperparameters) hyperparameters = functions.make_serializable(est.get_params()) base_learners = session.query(models.BaseLearner).\ filter_by(base_learner_origin_id=id, hyperparameters=hyperparameters).all() if base_learners: raise exceptions.UserError( 'Base learner exists with given hyperparameters') base_learner = models.BaseLearner(hyperparameters, 'queued', base_learner_origin) if 'single_searches' not in base_learner_origin.description: base_learner_origin.description['single_searches'] = [] base_learner_origin.description['single_searches'] += ([ req_body['source'] ]) session.add(base_learner) session.add(base_learner_origin) session.commit() with Connection(get_redis_connection()): rqtasks.generate_meta_features.delay(path, base_learner.id) return jsonify(base_learner.serialize)
def func_to_optimize(**params): base_estimator = base_learner_origin.return_estimator() base_estimator.set_params(**default_params) # For integer hyperparameters, make sure they are rounded off params = dict((key, val) if key not in integers else (key, int(val)) for key, val in iteritems(params)) base_estimator.set_params(**params) hyperparameters = functions.make_serializable(base_estimator.get_params()) # Look if base learner already exists base_learner = session.query(models.BaseLearner).\ filter_by(base_learner_origin_id=base_learner_origin.id, hyperparameters=hyperparameters).first() calculate_only = False # If base learner exists and has finished, just return its result if base_learner and base_learner.job_status == 'finished': if invert_metric: return -base_learner.individual_score[metric_to_optimize] else: return base_learner.individual_score[metric_to_optimize] # else if base learner exists but is unfinished, just calculate the result without storing elif base_learner and base_learner.job_status != 'finished': calculate_only = True # else if base learner does not exist, create it else: base_learner = models.BaseLearner(hyperparameters, 'started', base_learner_origin) base_learner.job_id = get_current_job().id session.add(base_learner) session.commit() try: est = base_learner.return_estimator() extraction = session.query(models.Extraction).first() X, y = extraction.return_train_dataset() return_splits_iterable = functions.import_object_from_string_code( extraction.meta_feature_generation['source'], 'return_splits_iterable' ) meta_features_list = [] trues_list = [] for train_index, test_index in return_splits_iterable(X, y): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] est = est.fit(X_train, y_train) meta_features_list.append( getattr(est, base_learner.base_learner_origin. meta_feature_generator)(X_test) ) trues_list.append(y_test) meta_features = np.concatenate(meta_features_list, axis=0) y_true = np.concatenate(trues_list) for key in base_learner.base_learner_origin.metric_generators: metric_generator = functions.import_object_from_string_code( base_learner.base_learner_origin.metric_generators[key], 'metric_generator' ) base_learner.individual_score[key] = metric_generator(y_true, meta_features) # Only do this if you want to save things if not calculate_only: meta_features_path = base_learner.meta_features_path(path) if not os.path.exists(os.path.dirname(meta_features_path)): os.makedirs(os.path.dirname(meta_features_path)) np.save(meta_features_path, meta_features, allow_pickle=False) base_learner.job_status = 'finished' base_learner.meta_features_exists = True session.add(base_learner) session.commit() if invert_metric: return -base_learner.individual_score[metric_to_optimize] else: return base_learner.individual_score[metric_to_optimize] except: session.rollback() base_learner.job_status = 'errored' base_learner.description['error_type'] = repr(sys.exc_info()[0]) base_learner.description['error_value'] = repr(sys.exc_info()[1]) base_learner.description['error_traceback'] = \ traceback.format_exception(*sys.exc_info()) session.add(base_learner) session.commit() raise