def test_real(): a = Real(1, 25) for i in range(50): yield (check_limits, a.rvs(random_state=i), 1, 25) random_values = a.rvs(random_state=0, n_samples=10) assert_array_equal(random_values.shape, (10)) assert_array_equal(a.transform(random_values), random_values) assert_array_equal(a.inverse_transform(random_values), random_values) log_uniform = Real(10**-5, 10**5, prior="log-uniform") for i in range(50): random_val = log_uniform.rvs(random_state=i) yield (check_limits, random_val, 10**-5, 10**5) random_values = log_uniform.rvs(random_state=0, n_samples=10) assert_array_equal(random_values.shape, (10)) transformed_vals = log_uniform.transform(random_values) assert_array_equal(transformed_vals, np.log10(random_values)) assert_array_equal( log_uniform.inverse_transform(transformed_vals), random_values)
NODES = [numVars,numVars*10] PATTERN = ['static', 'dynamic'] BATCH_POW = [8,11] # used as 2 ^ BATCH_POW LRATE = [1e-5,1e-2] REGULATOR = ['none', 'dropout', 'normalization', 'both'] ACTIVATION = ['relu','softplus','elu'] ### Optimization parameters NCALLS = 50 NSTARTS = 30 space = [ Integer(HIDDEN[0], HIDDEN[1], name = "hidden_layers"), Integer(NODES[0], NODES[1], name = "initial_nodes"), Integer(BATCH_POW[0], BATCH_POW[1], name = "batch_power"), Real(LRATE[0], LRATE[1], "log-uniform", name = "learning_rate"), Categorical(PATTERN, name = "node_pattern"), Categorical(REGULATOR, name = "regulator"), Categorical(ACTIVATION, name = "activation_function") ] ###################################################### ###################################################### ###### ###### ###### M O R E M E T H O D S ###### ###### ###### ###################################################### ###################################################### @use_named_args(space) def objective(**X):
from gaussian_process import GaussianProcessSearch file_dir = str(pathlib.Path(__file__).resolve().parents[0]) def signal_handler(sig, frame): print('\nSIGINT signal received: killing instances...') for instance in instances: instance.kill() sys.exit(0) signal.signal(signal.SIGINT, signal_handler) search_space = [ Real(low=0.01, high=1., name='reproduction_reward'), Real(low=1e-7, high=1e-4, name='time_step_modifier'), Real(low=0., high=1e-5, name='pop_reward_modifier'), ] if __name__ == "__main__": num_instances = 4 # Paths and files gpro_input_file = None # Use None to start from zero env_dir = "envs/" env_path = os.path.join(env_dir, "optimization_test.x86_64") log_files_dir = os.path.join(file_dir, "logs/") output_files_dir = "out_files/" config_file = "config/chickens.yaml"
from skopt.utils import use_named_args from skopt import gp_minimize from skopt.plots import plot_evaluations, plot_objective, plot_convergence import matplotlib.pyplot as plt import numpy as np import pandas as pd from models import Model from run_model import make_model with open('opt_results.txt', 'w') as f: f.write('Hyper OPT Results') dim_batch_size = Categorical(categories=[4, 8, 12, 24, 32], name='batch_size') dim_lookback = Integer(low=5, high=20, prior='uniform', name='lookback') dim_learning_rate = Real(low=1e-7, high=1e-3, prior='uniform', name='lr') dim_lstm_units = Categorical(categories=[16, 32, 64, 128], name='lstm_units') dim_act1_f = Categorical( categories=['relu', 'tanh', 'elu', 'LeakyRelu', 'none'], name='lstm1_act') dim_act2_f = Categorical( categories=['relu', 'tanh', 'elu', 'LeakyRelu', 'none'], name='lstm2_act') default_values = [12, 10, 0.00001, 32, 'none', 'none'] dimensions = [ dim_batch_size, dim_lookback, dim_learning_rate, dim_lstm_units, dim_act1_f, dim_act2_f ] def objective_fn(**kwargs):
def test_real(): a = Real(1, 25) for i in range(50): yield (check_limits, a.rvs(random_state=i), 1, 25) random_values = a.rvs(random_state=0, n_samples=10) assert_array_equal(random_values.shape, (10)) assert_array_equal(a.transform(random_values), random_values) assert_array_equal(a.inverse_transform(random_values), random_values) log_uniform = Real(10**-5, 10**5, prior="log-uniform") assert_not_equal(log_uniform, Real(10**-5, 10**5)) for i in range(50): random_val = log_uniform.rvs(random_state=i) yield (check_limits, random_val, 10**-5, 10**5) random_values = log_uniform.rvs(random_state=0, n_samples=10) assert_array_equal(random_values.shape, (10)) transformed_vals = log_uniform.transform(random_values) assert_array_equal(transformed_vals, np.log10(random_values)) assert_array_equal(log_uniform.inverse_transform(transformed_vals), random_values)
############################################################################### if __name__ == "__main__": ################################### # Select Optimization Options # ################################### #=== Number of Iterations ===# n_calls = 10 #=== Select Hyperparameters of Interest ===# hyperp_of_interest_dict = {} hyperp_of_interest_dict['num_hidden_layers_encoder'] = Integer(5, 10, name='num_hidden_layers_encoder') hyperp_of_interest_dict['num_hidden_nodes_encoder'] = Integer(100, 1000, name='num_hidden_nodes_encoder') hyperp_of_interest_dict['activation'] = Categorical(['relu', 'elu', 'sigmoid', 'tanh'], name='activation') hyperp_of_interest_dict['penalty_js'] = Real(0, 1, name='penalty_js') #hyperp_of_interest_dict['batch_size'] = Integer(100, 500, name='batch_size') ##################### # Initial Setup # ##################### #=== Generate skopt 'space' list ===# space = [] for key, val in hyperp_of_interest_dict.items(): space.append(val) #=== Hyperparameters ===# with open('../config_files/hyperparameters_vae_full.yaml') as f: hyperp = yaml.safe_load(f) hyperp = AttrDict(hyperp)
def sell_indicator_space() -> List[Dimension]: return [ Integer(30, 90, name='sell-adx'), Real(0, 1.0, name='sell-fisher') ]
( "post_process", OptionedPostProcessTransformer(dict_pipelines_post_process), ), ("resample", SMOTE()), ] ), "search_space": [ Categorical([True, False], name="undersampling_majority_class"), Integer(5, 6, name="max_k_undersampling"), Categorical(["minority", "all"], name="resample__sampling_strategy"), Categorical( ["option_1", "option_2", "option_3"], name="post_process__option" ), Integer(5, 15, name="model__max_depth"), Real(0.05, 0.31, prior="log-uniform", name="model__learning_rate"), Integer(1, 10, name="model__min_child_weight"), Real(0.8, 1, prior="log-uniform", name="model__subsample"), Real(0.13, 0.8, prior="log-uniform", name="model__colsample_bytree"), Real(0.1, 10, prior="log-uniform", name="model__scale_pos_weight"), Categorical(["binary:logistic"], name="model__objective"), ], }, "random_forest": { "model": RandomForestClassifier(), "pipeline_post_process": None, "search_space": [ Categorical([True, False], name="undersampling_majority_class"), Integer(0, 1, name="model__bootstrap"), Integer(10, 100, name="model__n_estimators"), Integer(2, 10, name="model__max_depth"),
activation='relu')) model.add(K.layers.Flatten()) model.add(K.layers.Dense(n_classes, activation='softmax')) optimizer = K.optimizers.Adam(lr=learning_rate) model.compile(loss='mean_absolute_error', optimizer=optimizer, metrics=['mae']) return model # Set up dimensions --> Edited to integers dim_filter1 = Integer(low=16, high=200, name='filter1', dtype=int) dim_filter2 = Integer(low=16, high=200, name='filter2', dtype=int) dim_filter3 = Integer(low=16, high=200, name='filter3', dtype=int) dim_learning_rate = Real(low=1e-4, high=1e-2, prior='log-uniform', name='learning_rate') dim_batch_size = Integer( low=10, high=64, name='batch_size', dtype=int) # NOTE: Must be int so it works for KerasRegressor copy dimensions = [ dim_filter1, dim_filter2, dim_filter3, dim_learning_rate, dim_batch_size ] print("Number of dimensions:", len(dimensions)) default_parameters = [64, 64, 64, 1e-4, 16] @use_named_args(dimensions=dimensions)
def test_searchcv_sklearn_compatibility(): """ Test whether the WeightedBayesSearchCV is compatible with base sklearn methods such as clone, set_params, get_params. """ X, y = load_iris(True) X_train, X_test, y_train, y_test = train_test_split( X, y, train_size=0.75, random_state=0 ) # used to try different model classes pipe = Pipeline([ ('model', SVC()) ]) # single categorical value of 'model' parameter sets the model class lin_search = { 'model': Categorical([LinearSVC()]), 'model__C': Real(1e-6, 1e+6, prior='log-uniform'), } dtc_search = { 'model': Categorical([DecisionTreeClassifier()]), 'model__max_depth': Integer(1, 32), 'model__min_samples_split': Real(1e-3, 1.0, prior='log-uniform'), } svc_search = { 'model': Categorical([SVC()]), 'model__C': Real(1e-6, 1e+6, prior='log-uniform'), 'model__gamma': Real(1e-6, 1e+1, prior='log-uniform'), 'model__degree': Integer(1, 8), 'model__kernel': Categorical(['linear', 'poly', 'rbf']), } opt = WeightedBayesSearchCV( pipe, [(lin_search, 1), svc_search], n_iter=2 ) opt_clone = clone(opt) params, params_clone = opt.get_params(), opt_clone.get_params() assert params.keys() == params_clone.keys() for param, param_clone in zip(params.items(), params_clone.items()): assert param[0] == param_clone[0] assert isinstance(param[1], type(param_clone[1])) opt.set_params(search_spaces=[(dtc_search, 1)]) opt.fit(X_train, y_train) opt_clone.fit(X_train, y_train) total_evaluations = len(opt.cv_results_['mean_test_score']) total_evaluations_clone = len(opt_clone.cv_results_['mean_test_score']) # test if expected number of subspaces is explored assert total_evaluations == 1 assert total_evaluations_clone == 1 + 2
def get_dimensions(): return [Real(low=0.01, high=1.0, name='sigma')]
from sklearn.externals import joblib from sklearn.pipeline import Pipeline from sklearn.preprocessing import MinMaxScaler, StandardScaler from sklearn_pandas import DataFrameMapper from utility import HyperParameters, Runner from model import load_clean_data_frame, ordinal_data_mapper sample = None iterations = 24 hyper_parameters = HyperParameters( search_space={ 'xgb__n_estimators': Integer(100, 500), 'xgb__learning_rate': Real(0.1, 0.3), 'xgb__gamma': Real(0.0001, 100.0, prior='log-uniform'), 'xgb__max_depth': Integer(3, 7), 'xgb__colsample_bytree': Real(0.4, 0.8), 'xgb__colsample_bylevel': Real(0.4, 0.8), 'xgb__colsample_bynode': Real(0.4, 0.8) }) # Features were selected based on feature importance from experiments. data_mapper = DataFrameMapper([(['iucr'], [MinMaxScaler()]), (['location'], [MinMaxScaler()]), (['latitude'], [StandardScaler()]), (['hour'], [MinMaxScaler()]), (['longitude'], [StandardScaler()]), (['type'], [MinMaxScaler()]), (['month'], [MinMaxScaler()]),
def main(): df_train = pd.read_csv('../train_dataset.csv') df_test = pd.read_csv('../test_dataset.csv') X_train, y_train = df_train.iloc[:, 2:].values, df_train.iloc[:, 0].values X_test, y_test = df_test.iloc[:, 2:].values, df_test.iloc[:, 0].values # log-uniform: understand as search over p = exp(x) by varying x opt = BayesSearchCV( estimator=xgb.XGBClassifier(), # ref: https://github.com/automl/auto-sklearn/blob/master/autosklearn/pipeline/components/classification/xgradient_boosting.py search_spaces={ 'learning_rate': Real(0.001, 1.0, 'log-uniform'), 'min_child_weight': Integer(0, 20), 'max_depth': Integer(0, 50), 'max_delta_step': Integer(0, 20), 'subsample': Real(0.01, 1.0, 'uniform'), 'colsample_bytree': Real(0.01, 1.0, 'uniform'), 'colsample_bylevel': Real(0.01, 1.0, 'uniform'), 'reg_lambda': Real(1e-10, 1e-1, 'log-uniform'), 'reg_alpha': Real(1e-10, 1e-1, 'log-uniform'), 'gamma': Real(1e-9, 0.5, 'log-uniform'), 'n_estimators': Integer(50, 512), 'scale_pos_weight': Real(1e-6, 500, 'log-uniform'), 'booster': ["gbtree", "dart"], 'sample_type': ['uniform', 'weighted'], 'normalize_type': ['tree', 'forest'], 'rate_drop': Real(1e-10, 1 - (1e-10), 'uniform') }, cv=StratifiedKFold(n_splits=10, shuffle=True), n_jobs=3, n_iter=100, verbose=0, refit=True, random_state=42) def status_print(_): """Status callback durring bayesian hyperparameter search""" # Get all the models tested so far in DataFrame format all_models = pd.DataFrame(opt.cv_results_) best_parap_copy = copy.deepcopy(opt.best_params_) for k, v in opt.best_params_.items(): best_parap_copy[k] = v if isinstance(v, str) or isinstance( v, float) else v.item() param_list = [] for each in json.dumps(best_parap_copy)[1:-1].split(', '): param_list.append('='.join(each[1:].split('": '))) if hasattr(opt.estimator, 'verbose'): param_list.append('verbose=True') param = opt.estimator.__class__.__name__ + \ '(' + ', '.join(param_list) + ')' # Get current parameters and the best parameters print('Model #{}\nBest roc_auc: {}\nBest params: {}\n'.format( len(all_models), np.round(opt.best_score_, 4), param)) opt.fit(X_train, y_train, callback=status_print) print("val. score: %s" % opt.best_score_) print("test score: %s" % opt.score(X_test, y_test))
def roi_space() -> List[Dimension]: """ Create a ROI space. Defines values to search for each ROI steps. This method implements adaptive roi hyperspace with varied ranges for parameters which automatically adapts to the ticker interval used. It's used by Freqtrade by default, if no custom roi_space method is defined. """ # Default scaling coefficients for the roi hyperspace. Can be changed # to adjust resulting ranges of the ROI tables. # Increase if you need wider ranges in the roi hyperspace, decrease if shorter # ranges are needed. roi_t_alpha = 1.0 roi_p_alpha = 1.0 timeframe_min = timeframe_to_minutes(IHyperOpt.ticker_interval) # We define here limits for the ROI space parameters automagically adapted to the # timeframe used by the bot: # # * 'roi_t' (limits for the time intervals in the ROI tables) components # are scaled linearly. # * 'roi_p' (limits for the ROI value steps) components are scaled logarithmically. # # The scaling is designed so that it maps exactly to the legacy Freqtrade roi_space() # method for the 5m ticker interval. roi_t_scale = timeframe_min / 5 roi_p_scale = math.log1p(timeframe_min) / math.log1p(5) roi_limits = { 'roi_t1_min': int(10 * roi_t_scale * roi_t_alpha), 'roi_t1_max': int(120 * roi_t_scale * roi_t_alpha), 'roi_t2_min': int(10 * roi_t_scale * roi_t_alpha), 'roi_t2_max': int(60 * roi_t_scale * roi_t_alpha), 'roi_t3_min': int(10 * roi_t_scale * roi_t_alpha), 'roi_t3_max': int(40 * roi_t_scale * roi_t_alpha), 'roi_p1_min': 0.01 * roi_p_scale * roi_p_alpha, 'roi_p1_max': 0.04 * roi_p_scale * roi_p_alpha, 'roi_p2_min': 0.01 * roi_p_scale * roi_p_alpha, 'roi_p2_max': 0.07 * roi_p_scale * roi_p_alpha, 'roi_p3_min': 0.01 * roi_p_scale * roi_p_alpha, 'roi_p3_max': 0.20 * roi_p_scale * roi_p_alpha, } logger.debug(f"Using roi space limits: {roi_limits}") p = { 'roi_t1': roi_limits['roi_t1_min'], 'roi_t2': roi_limits['roi_t2_min'], 'roi_t3': roi_limits['roi_t3_min'], 'roi_p1': roi_limits['roi_p1_min'], 'roi_p2': roi_limits['roi_p2_min'], 'roi_p3': roi_limits['roi_p3_min'], } logger.info( f"Min roi table: {round_dict(IHyperOpt.generate_roi_table(p), 5)}") p = { 'roi_t1': roi_limits['roi_t1_max'], 'roi_t2': roi_limits['roi_t2_max'], 'roi_t3': roi_limits['roi_t3_max'], 'roi_p1': roi_limits['roi_p1_max'], 'roi_p2': roi_limits['roi_p2_max'], 'roi_p3': roi_limits['roi_p3_max'], } logger.info( f"Max roi table: {round_dict(IHyperOpt.generate_roi_table(p), 5)}") return [ Integer(roi_limits['roi_t1_min'], roi_limits['roi_t1_max'], name='roi_t1'), Integer(roi_limits['roi_t2_min'], roi_limits['roi_t2_max'], name='roi_t2'), Integer(roi_limits['roi_t3_min'], roi_limits['roi_t3_max'], name='roi_t3'), Real(roi_limits['roi_p1_min'], roi_limits['roi_p1_max'], name='roi_p1'), Real(roi_limits['roi_p2_min'], roi_limits['roi_p2_max'], name='roi_p2'), Real(roi_limits['roi_p3_min'], roi_limits['roi_p3_max'], name='roi_p3'), ]
class BaseModelConfig(metaclass=ABCMeta): """Base class for each account model. Implements generic default configuration and pipeline steps that can be overridden and extended in sub-classes. Runtime state should be managed via :class:`~forecasting_platform.services.Services` and not as instance attributes. Any :func:`~abc.abstractmethod` must be defined in the account model config classes, therefore these abstract methods are the minimum required configuration for a new account model. Args: runtime_config: All configuration of the current run. data_loader: Service for loading data. """ def __init__(self, runtime_config: RuntimeConfig, data_loader: DataLoader): self._runtime_config = runtime_config self._data_loader = data_loader self.forecast_path = absolute_path( runtime_config.output_path / "08 Predictions" / f"Forecast {self.MODEL_NAME}" / f"{runtime_config.prediction_month.strftime(PREDICTION_MONTH_FORMAT)}" / f"{runtime_config.run_timestamp}" f"_{runtime_config.prediction_month.strftime(PREDICTION_MONTH_FORMAT)}" f"_T{runtime_config.test_periods}" f"_P{runtime_config.predict_periods}" f"_{'_'.join(self.GROUPING)}") def __str__(self) -> str: return f"{self.MODEL_NAME}: {self.forecast_path}" @property @abstractmethod def MODEL_NAME(self) -> str: """Model identifier used to reference this model in created files and log messages.""" ... @property @abstractmethod def CONTRACTS(self) -> List[str]: """List of contracts to be included in the model.""" ... @property def model_data_query(self) -> Any: """Database query used to split training data for this model.""" return CleanedData.c.Contract_ID.in_(self.CONTRACTS) @property @abstractmethod def TRAINING_START(self) -> pd.Timestamp: """Account-specific :class:`pandas.Timestamp` to include training data newer than this month.""" ... @property def GROUPING(self) -> List[str]: """Define the granularity of aggregations and the identifier/index level for this model.""" return ["Contract_ID", "Item_ID"] @property @abstractmethod def POSTPROCESS_DEPTH(self) -> int: """Consider these number of months when performing certain postprocessing steps.""" ... @property @abstractmethod def DEFAULT_FEATURES(self) -> List[Feature]: """Create the list of account-specific features with :func:`~owforecasting.features.default_features`.""" ... HYPER_SPACE: List[Dimension] = [ Integer(5, 30, name="max_depth"), Real(1e-5, 1e-1, name="learn_rate", prior="log-uniform"), Real(0.9, 0.9999, name="learn_rate_annealing"), Integer(10, 25, name="min_rows"), Integer(50, 200, name="ntrees"), Real(1e-4, 1e-2, name="stopping_tolerance"), Integer(2, 10, name="stopping_rounds"), Real(1e-1, 1, name="sample_rate"), Real(0.99, 1, name="col_sample_rate"), Integer(100, 500, name="nbins"), Real(1e-10, 1e-3, name="min_split_improvement"), Integer(10, 11, name="nfolds"), ] #: Apply weighting feature to training data. Disable this feature, when set to 0. WEIGHTING: Optional[int] = None PREPROCESS_OUTLIERS = False PREPROCESS_UNIT_COST_AGGREGATION = "mean" EXCLUDE_PROJECTS: List[str] = [] ONLY_INCLUDE_PROJECTS: List[str] = [] EXCLUDE_ITEMS: List[int] = [] ONLY_INCLUDE_ITEMS: List[int] = [] SALES_MIN_PERIOD = 5 DEFAULT_HYPER_PARAMS = { "weights_column": "W_Weight", "score_each_iteration": True, # Avoid non-deterministic behavior "score_tree_interval": 1, # Avoid non-deterministic behavior, # see http://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/algo-params/score_tree_interval.html } #: Number of points to evaluate by :func:`~owforecasting.models.optimize_bayes`. OPTIMIZE_HYPER_PARAMETERS_N_CALLS = 20 _OPTIMIZE_BAYES_GP_ARGS = None # Only exists so it can be changed to speed-up integration tests @property def OVERRIDE_HYPER_PARAMS(self) -> Dict[str, object]: """Set this to extend or overwrite values from DEFAULT_HYPER_PARAMS.""" return {} def configure_features( self, cleaned_data_run_id: int ) -> Tuple[InternalFeatures, ExogenousFeatures]: """Override this to configure account-specific internal or exogenous features, e.g. build-rates. Args: cleaned_data_run_id: ID of the run that created the cleaned data set in the internal database. """ return {}, {} def calculate_weights(self) -> Weights: """Override this to configure account-specific weighting functions.""" assert self.WEIGHTING is not None, f"Must define WEIGHTING for default calculate_weights function on {self}" return calculate_default_weights(self.TRAINING_START, self._runtime_config.forecast_end, self.WEIGHTING) def preprocess_account_data( self, sales_raw: pd.DataFrame, grouping: List[str], internal_features: InternalFeatures) -> pd.DataFrame: """Preprocess account data by filtering and aggregating the raw sales data and adding internal features. Args: sales_raw: Raw sales data to pre-process. grouping: Grouping columns to use. internal_features: Internal features to add. Returns: :class:`~pandas.DataFrame` with pre-processing applied. """ sales = filter_sales( sales_raw, exclude_projects=self.EXCLUDE_PROJECTS, only_include_projects=self.ONLY_INCLUDE_PROJECTS, exclude_items=self.EXCLUDE_ITEMS, only_include_items=self.ONLY_INCLUDE_ITEMS, ) sales = add_sales_features(sales, internal_features) return group_sales( sales, grouping, unit_cost_aggregation=self.PREPROCESS_UNIT_COST_AGGREGATION) def prepare_training_data( self, sales: pd.DataFrame, grouping: List[str], exo_features: ExogenousFeatures, ) -> TimeSeries: """Prepare sales data for training by configuring :class:`~owforecasting.timeseries.TimeSeries`. This includes selection of training data from all sales and weighting of provided exogenous features. Args: sales: Sales data to prepare for training. grouping: Grouping columns to use for the time series. exo_features: Exogenous features to add to the data. weights: Weighting feature based on dates. Returns: Prepared training data. """ sales_train = preprocess_grouped_sales( sales, self._runtime_config.forecast_start, self._runtime_config.test_periods, self.TRAINING_START, grouping, sales_min_period=self.SALES_MIN_PERIOD, ) if sales_train.empty: raise ConfigurationException( f"Not enough training data for {self}. " "Please check configuration of --forecast-periods and TRAINING_START." ) if self.PREPROCESS_OUTLIERS: ts_tmp = TimeSeries( sales_train, datetime_col="Date", response_col="Order_Quantity", grouping_cols=grouping, granularity=Granularity.MONTH, test_periods=self._runtime_config.test_periods, predict_periods=self._runtime_config.predict_periods, standardize_response=False, ) sales_train = preprocess_outliers( ts_tmp._data, self.GROUPING, self._runtime_config.forecast_start).reset_index(drop=True) # Create TimeSeries ts = TimeSeries( sales_train, datetime_col="Date", response_col="Order_Quantity", grouping_cols=grouping, granularity=Granularity.MONTH, test_periods=self._runtime_config.test_periods, predict_periods=self._runtime_config.predict_periods, ).add_features(self.DEFAULT_FEATURES) # Add exogenous features for i, (name, (exo, default)) in enumerate(exo_features.items()): ts.add_exogenous_feature(f"EXO{i}", exo, default) # Add weighting weights = self.calculate_weights() ts.add_exogenous_feature("W", weights) return ts def postprocess_forecast(self, ts: TimeSeries, ts_pred: TimeSeries, sales: pd.DataFrame, grouping: List[str]) -> pd.DataFrame: """Postprocess account forecast, including filtering and grouping. Args: ts: :class:`~owforecasting.timeseries.TimeSeries` containing actual values. ts_pred: :class:`~owforecasting.timeseries.TimeSeries` containing predicted values. sales: :class:`~pandas.DataFrame` with actual sales data. grouping: Grouping columns to use. Returns: Post-processed forecast. """ return postprocess_forecast_results( ts_pred.result_data, grouping, self._runtime_config.forecast_start, self.POSTPROCESS_DEPTH)
Categorical([1, 10, 20, None], name='max_depth'), Categorical(['balanced', 'balanced_subsample'], name='class_weight'), Categorical([200, 400, 800, 1600, 3200], name='n_estimators'), Categorical(['auto', 'log2'], name='max_features'), Categorical([True, False], name='bootstrap'), Integer(2, 10, name='min_samples_split'), Integer(1, 5, name='min_samples_leaf') ] lgbm_space = [ Categorical([1, 5, 10, 15, 20, -1], name='max_depth'), Integer(5, 50, name='num_leaves'), Categorical(['gbdt', 'dart'], name='boosting_type'), Categorical([True, False], name='is_unbalance'), Categorical([200, 400, 800, 1600, 3200], name='n_estimators'), Real(0.01, 0.5, name='learning_rate'), Integer(25, 500, name='max_bin'), Integer(10, 50, name='min_data_in_leaf') ] svm_space = [ Categorical(['optimal', 'invscaling'], name='learning_rate'), Integer(500, 100000, name='max_iter'), Categorical([ 'hinge', ], name='loss'), Categorical(['l2', 'elasticnet'], name='penalty'), Real(0.00001, 0.001, name='eta0'), Real(0.00001, 0.001, name='alpha'), Categorical(['balanced', None], name='class_weight'), Categorical([False, 5, 10, 20, True], name='average') ]
EncodingDecodingNeuronLayer.get_simple_constructor( epsilons=epsilons, lambdas=lambdas, quantizer=quantizer)) N_PARAMETER_SEARCH_EPOCHS = 1 # ====================================================================================================================== # Try best parameters from demo_mnist_quantized_find_best_params X = X_1hid_quantized_scheduled.add_config_root_variant( 'poly_schedule', epsilons=lambda eps_init, eps_exp: f'{eps_init}/t**{eps_exp}', lambdas=lambda lambda_init, lambda_exp: f'{lambda_init}/t**{lambda_exp}') X.add_root_variant(n_epochs=N_PARAMETER_SEARCH_EPOCHS, epoch_checkpoint_period=None, quantizer='sigma_delta').add_parameter_search( space=dict(eps_init=Real(0, 1, 'uniform'), eps_exp=Real(0, 1, 'uniform'), lambda_init=Real(0, 1, 'uniform'), lambda_exp=Real(0, 1, 'uniform')), scalar_func=lambda result: result[-1, 'val_error'], n_calls=500) X = X_3hid_quantized_scheduled.add_config_root_variant( 'poly_schedule', epsilons=lambda eps_init, eps_exp: f'{eps_init}/t**{eps_exp}', lambdas=lambda lambda_init, lambda_exp: f'{lambda_init}/t**{lambda_exp}') X_3hid_paramsearch_base = X.add_root_variant( n_epochs=N_PARAMETER_SEARCH_EPOCHS, epoch_checkpoint_period=None, quantizer='sigma_delta') for X in (X_3hid_paramsearch_base, X_3hid_paramsearch_base.add_root_variant(n_negative_steps=100,
settings['out_path_process'] = "/mnt/Datos/BML_CNCRS/Spoc/ECoG_STN/" settings['frequencyranges'] = [[4, 8], [8, 12], [13, 20], [20, 35], [13, 35], [60, 80], [90, 200], [60, 200]] settings['seglengths'] = [1, 2, 2, 3, 3, 3, 10, 10, 10] # settings['num_patients']=['000', '004', '005', '007', '008', '009', '010', '013', '014'] settings['num_patients'] = [ '000', '001', '004', '005', '006', '007', '008', '009', '010', '013', '014' ] settings['BIDS_path'] = settings['BIDS_path'].replace("\\", "/") settings['out_path'] = settings['out_path'].replace("\\", "/") #%% space_LM = [ Real(1e-6, 1, "uniform", name='alpha'), Real(1e-6, 1, "uniform", name='l1_ratio') ] #%% def func(y, time_stamps=5): y_ = y.copy() # y_[:time_stamps]=np.zeros((time_stamps,1)) return y_[time_stamps:] def inverse_func(x, time_stamps=5): x_ = x.copy() x_ = np.vstack((np.zeros((time_stamps, 1)), x))
""" Code for a gender balanced explicit ALS algorithm. """ from skopt.space import Integer, Real from lenskit.algorithms.als import BiasedMF from bookgender.rerank.GenderCalibratedRecommender import GenderCalibratedRecommender from bookgender.rerank.fastForceGenderBalanceRecommender import FastForceGenderBalanceRecommender from bookgender.rerank.rerankUtil import getBookGender from bookgender.rerank.slowForceGenderBalanceRecommender import SlowForceGenderBalanceRecommender from bookgender.rerank.slowForceGenderTargetRecommender import SlowForceGenderTargetRecommender dimensions = [Integer(5, 300), Real(1.0e-6, 5), Real(1.0e-6, 5), Real(0, 25)] def instantiate(opts, implicit): feats, ureg, ireg, damp = opts return GenderCalibratedRecommender( BiasedMF(feats, reg=(ureg, ireg), damping=damp), getBookGender(), 0.5) #return SlowForceGenderTargetRecommender(BiasedMF(feats, reg=(ureg, ireg), damping=damp), getBookGender()) update = None sweep_points = [BiasedMF(nf) for nf in range(25, 250, 25)] sweep_attrs = ['features']
class LightGBMRegressor(Estimator): """LightGBM Regressor""" name = "LightGBM Regressor" hyperparameter_ranges = { "learning_rate": Real(0.000001, 1), "boosting_type": ["gbdt", "dart", "goss", "rf"], "n_estimators": Integer(10, 100), "max_depth": Integer(0, 10), "num_leaves": Integer(2, 100), "min_child_samples": Integer(1, 100), "bagging_fraction": Real(0.000001, 1), "bagging_freq": Integer(0, 1) } model_family = ModelFamily.LIGHTGBM supported_problem_types = [ProblemTypes.REGRESSION] SEED_MIN = 0 SEED_MAX = SEED_BOUNDS.max_bound def __init__(self, boosting_type="gbdt", learning_rate=0.1, n_estimators=20, max_depth=0, num_leaves=31, min_child_samples=20, n_jobs=-1, random_state=None, random_seed=0, bagging_fraction=0.9, bagging_freq=0, **kwargs): random_seed = deprecate_arg("random_state", "random_seed", random_state, random_seed) parameters = { "boosting_type": boosting_type, "learning_rate": learning_rate, "n_estimators": n_estimators, "max_depth": max_depth, "num_leaves": num_leaves, "min_child_samples": min_child_samples, "n_jobs": n_jobs, "bagging_freq": bagging_freq, "bagging_fraction": bagging_fraction } parameters.update(kwargs) lg_parameters = copy.copy(parameters) # when boosting type is random forest (rf), LightGBM requires bagging_freq == 1 and 0 < bagging_fraction < 1.0 if boosting_type == "rf": lg_parameters['bagging_freq'] = 1 # when boosting type is goss, LightGBM requires bagging_fraction == 1 elif boosting_type == "goss": lg_parameters['bagging_fraction'] = 1 # avoid lightgbm warnings having to do with parameter aliases if lg_parameters['bagging_freq'] is not None or lg_parameters[ 'bagging_fraction'] is not None: lg_parameters.update({'subsample': None, 'subsample_freq': None}) lgbm_error_msg = "LightGBM is not installed. Please install using `pip install lightgbm`." lgbm = import_or_raise("lightgbm", error_msg=lgbm_error_msg) self._ordinal_encoder = None lgbm_regressor = lgbm.sklearn.LGBMRegressor(random_state=random_seed, **lg_parameters) super().__init__(parameters=parameters, component_obj=lgbm_regressor, random_seed=random_seed) def _encode_categories(self, X, fit=False): """Encodes each categorical feature using ordinal encoding.""" X = infer_feature_types(X) cat_cols = list(X.select('category').columns) X = _convert_woodwork_types_wrapper(X.to_dataframe()) if fit: self.input_feature_names = list(X.columns) X_encoded = _rename_column_names_to_numeric(X) rename_cols_dict = dict(zip(X.columns, X_encoded.columns)) cat_cols = [rename_cols_dict[col] for col in cat_cols] if len(cat_cols) == 0: return X_encoded if fit: self._ordinal_encoder = OrdinalEncoder() encoder_output = self._ordinal_encoder.fit_transform( X_encoded[cat_cols]) else: encoder_output = self._ordinal_encoder.transform( X_encoded[cat_cols]) X_encoded[cat_cols] = pd.DataFrame(encoder_output) X_encoded[cat_cols] = X_encoded[cat_cols].astype('category') return X_encoded def fit(self, X, y=None): X_encoded = self._encode_categories(X, fit=True) if y is not None: y = infer_feature_types(y) y = _convert_woodwork_types_wrapper(y.to_series()) self._component_obj.fit(X_encoded, y) return self def predict(self, X): X_encoded = self._encode_categories(X) return super().predict(X_encoded)
import pytest from skopt.space import Real, Integer, Categorical from src.utilities import conf_to_params, bayesianMLP_param_space @pytest.mark.parametrize( "c, expected", [({ "name": "learning_rate", "type": "Real", "low": 0.1, "high": 0.2 }, Real(low=0.1, high=0.2, prior='uniform', transform='identity')), ({ "name": "num_hidden_layer", "type": "Integer", "low": 0, "high": 3 }, Integer(low=0, high=3)), ({ "name": "categorical", "type": "Categorical", "categories": [0, 1, 2] }, Categorical([0, 1, 2], name="categorical"))]) def test_conf_to_params(c, expected): assert conf_to_params(c) == expected # def test_bayesianMLP_param_space(): # BOconfig = { # "nBayesianOptCall": 10,
def test_normalize(): a = Real(2.0, 30.0, transform="normalize") for i in range(50): yield (check_limits, a.rvs(random_state=i), 2, 30) rng = np.random.RandomState(0) X = rng.randn(100) X = 28 * (X - X.min()) / (X.max() - X.min()) + 2 # Check transformed values are in [0, 1] assert_true(np.all(a.transform(X) <= np.ones_like(X))) assert_true(np.all(np.zeros_like(X) <= a.transform(X))) # Check inverse transform assert_array_almost_equal(a.inverse_transform(a.transform(X)), X) # log-uniform prior a = Real(10**2.0, 10**4.0, prior="log-uniform", transform="normalize") for i in range(50): yield (check_limits, a.rvs(random_state=i), 10**2, 10**4) rng = np.random.RandomState(0) X = np.clip(10**3 * rng.randn(100), 10**2.0, 10**4.0) # Check transform assert_true(np.all(a.transform(X) <= np.ones_like(X))) assert_true(np.all(np.zeros_like(X) <= a.transform(X))) # Check inverse transform assert_array_almost_equal(a.inverse_transform(a.transform(X)), X) a = Integer(2, 30, transform="normalize") for i in range(50): yield (check_limits, a.rvs(random_state=i), 2, 30) assert_array_equal(a.transformed_bounds, (0, 1)) X = rng.randint(2, 31) # Check transformed values are in [0, 1] assert_true(np.all(a.transform(X) <= np.ones_like(X))) assert_true(np.all(np.zeros_like(X) <= a.transform(X))) # Check inverse transform X_orig = a.inverse_transform(a.transform(X)) assert_equal(X_orig.dtype, "int64") assert_array_equal(X_orig, X)
# parametric tuning for info-detection from skopt.space import Real, Integer from skopt.utils import use_named_args from skopt import gp_minimize from util import TPR_TNR from util import Lymphography from info_detection import InfoOutlierDetector SPACE = [Real(0.01, 0.2, prior='uniform', name='gamma')] data, labels = Lymphography() @use_named_args(SPACE) def objective(**params): global data, labels ic = InfoOutlierDetector(affinity='laplacian', **params) y_predict = ic.fit_predict(data) tpr, tnr = TPR_TNR(labels, y_predict) if (tpr < 0.9): return 10 return 0.9 - tpr - tnr if __name__ == '__main__': res_gp = gp_minimize(objective, SPACE, n_calls=40, random_state=0) print(res_gp)
def param_search_and_cross_validation( num_estimators=5000, early_stopping_rounds=15, data_train=data_train, data_train__target=data_train__target, data_test=data_test, data_test__target=data_test__target, data_eval=data_eval, data_eval__target=data_eval__target, num_cv_folds=num_cv_folds): """ Trains a sequence of weak, boosted learners on training data Stops after error no measured imcustomervements on prediction accuracy after n stopping rounds Best params identified using log loss function via SK Opt library Function returns a series of best params on which to train a usable model on """ """ Additional params to consider if models overfit ## Learning Params early_stopping_rounds : will stop training if one metric of one validation data doesn’t imcustomerve in last early_stopping_round rounds ## IO Params max_bin : (default is 255, decrease the number to mitigate over fitting. Risks drop in accuracy) """ hyperparameters = [ Integer(4, 96, name='num_leaves'), # max number of leaves in one tree Integer(3, 5, name='max_depth'), # max depth of an individual stump Real( 2**-8, 2**-2, 'log-uniform', name='learning_rate' ), # booster's learning rate, array of 50 numbers uniformly spaced in [ 1/(2^8), 1/(2^2) ] Integer( 2, 96, name='min_data_in_leaf'), # minimal number of data in one leaf Real(.5, 1.0, 'uniform', name='bagging_fraction' ), # will randomly select part of data without resampling, Real( 0.5, 1.0, 'uniform', name='feature_fraction' ), # will select n% of features w/o resampling before training each stump Real(0.3, 1.0, "uniform", name='colsample_bytree' ), # subsample ratio of columns when constructing each tree Integer( 25, 150, name='max_bin' ) # max number of bins that feature values will be bucketed in ] clf = lgb.LGBMClassifier( n_estimators=num_estimators, # number of boosted trees to build objective='binary', silent=False, importance_type= 'gain', # gain in _some metric_ when a feature is included seed= 12759081, # setting this, but underlying C++ seeds may overwrite num_threads= 4, # number of real CPUs available on the playground machine class_weight= 'balanced' # uses the values of y to automatically adjust weights inversely customerportional to class frequencies in the input data ) # Per LGBM docs https://scikit-optimize.github.io/#skopt.gp_minimize : # hyper params can be passed to a pre-defined fitted model's objective function via the following decorator @use_named_args(hyperparameters) def objective_fxn(**params): """ Trains a series of models across 7 (hardcoded) folds of the training data & over the various pre-defined hyper params Leveraging SKLearn's `cross_val_score()` method for this """ print('Training a model using the following params:') print(params) clf.set_params(**params) # Using cross val score() for now, but consider using cross_validate() for more info in a later iteration # https://scikit-learn.org/stable/modules/cross_validation.html#cross-validation # Scoring docs also available at link above cv_score_scores__mean = -1.0 * cross_val_score( clf, # model data_train, # data to fit a model to data_train__target, # target variable cv=num_cv_folds, # number of folds to iterate over scoring='neg_log_loss', fit_params={ 'early_stopping_rounds': early_stopping_rounds, 'eval_set': (data_eval, data_eval__target), 'verbose': True }).mean() print( 'Cross Validation Complete on Hyperparam Permutation\nMean Log Loss: {0}' .format(cv_score_scores__mean)) print('Logging results') num_leaves.append(params['num_leaves']) max_depth.append(params['max_depth']) learning_rate.append(params['learning_rate']) min_data_in_leaf.append(params['min_data_in_leaf']) bagging_fraction.append(params['bagging_fraction']) feature_fraction.append(params['feature_fraction']) colsample_bytree.append(params['colsample_bytree']) max_bins.append(params['max_bin']) cross_val_score__mean.append(cv_score_scores__mean) table_record_udpate_timestamp.append(datetime.now()) return (cv_score_scores__mean) # Leveraging SK Opt's Gaussian process Bayesian Optimization `gp_minimize()` method to apcustomerximate the 'best params' to # use in a final model # https://scikit-optimize.github.io/#skopt.gp_minimize # Method returns an OptimizeResult object. See link above for full docs on all the data returned print( 'Starting Cross Validation via Random Search, with early stopping in place' ) gaussian_process_results_array = gp_minimize( objective_fxn, hyperparameters, # list of search space dimensions n_calls=30, # number of calls to make against the objective function random_state= 215235 # seeding the optimizer for recustomerducible results ) return (gaussian_process_results_array)
def stoploss_space() -> List[Dimension]: return [ Real(-0.05, -0.01, name='stoploss'), ]
nr_fold = 5 random_state = 42 train = pd.read_csv('../input/train_input_2yaM34J.csv', parse_dates=['Date']) y = pd.read_csv('../input/train_output_2kCtjpF.csv')['Score'] train = get_dates(train) train = train.drop(['ID', 'Date'], axis=1) space = [ Integer(3, 200, name='max_depth'), Integer(2, 2056, name='num_leaves'), Integer(3, 200, name='min_child_samples'), Real(0.2, 0.90, name='subsample'), Real(0.2, 0.90, name='colsample_bytree'), Real(0.001, 0.2, name='learning_rate'), Real(0.0001, 100, name='reg_alpha'), Real(0.0001, 100, name='reg_lambda'), Integer(2, 1000, name='min_child_weight'), Real(0.001, 1, name='min_split_gain'), Categorical(['gbdt', 'dart', 'goss'], name='boosting_type') ] def objective(values): params = { 'device': 'cpu', 'objective': 'multiclass', 'num_class': 5,
# -3.0 -> 0.001 def pow10map(x): return 10.0**x def pow2intmap(x): return int(2.0**x) def nop(x): return x nnparams = { # up to 1024 neurons 'hidden_layer_sizes': (Real(1.0, 10.0), pow2intmap), 'activation': (Categorical(['identity', 'logistic', 'tanh', 'relu']), nop), 'solver': (Categorical(['lbfgs', 'sgd', 'adam']), nop), 'alpha': (Real(-5.0, -1), pow10map), 'batch_size': (Real(5.0, 10.0), pow2intmap), 'learning_rate': (Categorical(['constant', 'invscaling', 'adaptive']), nop), 'max_iter': (Real(5.0, 8.0), pow2intmap), 'learning_rate_init': (Real(-5.0, -1), pow10map), 'power_t': (Real(0.01, 0.99), nop), 'momentum': (Real(0.1, 0.98), nop), 'nesterovs_momentum': (Categorical([True, False]), nop), 'beta_1': (Real(0.1, 0.98), nop), 'beta_2': (Real(0.1, 0.9999999), nop), }
@pytest.mark.fast_test @pytest.mark.parametrize("dimensions, normalizations", [ (((1, 3), (1., 3.)), ('normalize', 'normalize')), (((1, 3), ('a', 'b', 'c')), ('normalize', 'onehot')), ]) def test_normalize_dimensions(dimensions, normalizations): space = normalize_dimensions(dimensions) for dimension, normalization in zip(space, normalizations): assert dimension.transform_ == normalization @pytest.mark.fast_test @pytest.mark.parametrize( "dimension, name", [(Real(1, 2, name="learning rate"), "learning rate"), (Integer(1, 100, name="no of trees"), "no of trees"), (Categorical(["red, blue"], name="colors"), "colors")]) def test_normalize_dimensions(dimension, name): space = normalize_dimensions([dimension]) assert space.dimensions[0].name == name @pytest.mark.fast_test def test_use_named_args(): """ Test the function wrapper @use_named_args which is used for wrapping an objective function with named args so it can be called by the optimizers which only pass a single list as the arg.
verbose=0, class_weight=classWeight, callbacks=[early_stopping, model_checkpoint], validation_split=0.25) Y_predict = model.predict(X_test) fpr, tpr, thresholds = roc_curve(Y_test, Y_predict) roc_auc = auc(fpr, tpr) return roc_auc #best_acc = max(history.history['val_acc']) #return best_acc space = [ Integer(2, 4, name='hidden_layers'), Integer(32, 256, name='initial_nodes'), Real(10**-5, 10**-1, "log-uniform", name='l2_lambda'), Real(0.15, 0.5, name='dropout'), Integer(256, 4096, name='batch_size'), Real(10**-5, 10**-1, "log-uniform", name='learning_rate'), ] @use_named_args(space) def objective(**X): global best_auc global best_config print("New configuration: {}".format(X)) model = build_custom_model(num_hiddens=X['hidden_layers'], initial_node=X['initial_nodes'], dropout=X['dropout'],
""" Code for optimizing the implicit ALS algorithm. """ from skopt.space import Integer, Real from lenskit.algorithms.als import ImplicitMF dimensions = [Integer(5, 500), Real(1.0e-6, 5), Real(1.0e-6, 5), Real(1, 50)] def instantiate(opts, implicit): feats, ureg, ireg, weight = opts return ImplicitMF(feats, reg=(ureg, ireg), weight=weight) def default(implicit): return ImplicitMF(50) update = None sweep_points = [ImplicitMF(nf) for nf in range(25, 250, 25)] sweep_attrs = ['features']
def test_real(): a = Real(1, 25) for i in range(50): r = a.rvs(random_state=i) check_limits(r, 1, 25) assert r in a random_values = a.rvs(random_state=0, n_samples=10) assert len(random_values) == 10 assert_array_equal(a.transform(random_values), random_values) assert_array_equal(a.inverse_transform(random_values), random_values) log_uniform = Real(10**-5, 10**5, prior="log-uniform") assert log_uniform != Real(10**-5, 10**5) for i in range(50): random_val = log_uniform.rvs(random_state=i) check_limits(random_val, 10**-5, 10**5) random_values = log_uniform.rvs(random_state=0, n_samples=10) assert len(random_values) == 10 transformed_vals = log_uniform.transform(random_values) assert_array_equal(transformed_vals, np.log10(random_values)) assert_array_equal(log_uniform.inverse_transform(transformed_vals), random_values)