def _worker(cls, tempdir, task_id, pickled_fn, pickled_args, return_list, gpu_ids, args): """Worker function in the client """ with open(os.path.join(tempdir, f'{task_id}.out'), 'w') as std_out: with open(os.path.join(tempdir, f'{task_id}.err'), 'w') as err_out: # redirect stdout/strerr into a file so the main process can read it after the job is completed if not is_fork_enabled(): sys.stdout = std_out sys.stderr = err_out # Only fork mode allows passing non-picklable objects fn = pickled_fn if is_fork_enabled() else dill.loads( pickled_fn) args = { **pickled_args, **args } if is_fork_enabled() else { **dill.loads(pickled_args), **args } DistributedJobRunner.set_cuda_environment(gpu_ids) # running try: ret = fn(**args) except AutoGluonEarlyStop: ret = None return_list.append(ret) sys.stdout.flush() sys.stderr.flush()
def _run_dist_job(cls, task_id, fn, args, gpu_ids): """Remote function Executing the task """ if '_default_config' in args['args']: args['args'].pop('_default_config') if 'reporter' in args: local_reporter = LocalStatusReporter() dist_reporter = args['reporter'] args['reporter'] = local_reporter manager = mp.Manager() return_list = manager.list() try: # Starting local process # Note: we have to use dill here because every argument passed to a child process over spawn or forkserver # has to be pickled. fork mode does not require this because memory sharing, but it is unusable for CUDA # applications (CUDA does not support fork) and multithreading issues (hanged threads). # Usage of decorators makes standard pickling unusable (https://docs.python.org/3/library/pickle.html#what-can-be-pickled-and-unpickled) # Dill enables sending of decorated classes. Please note if some classes are used in the training function, # those classes are best be defined inside the function - this way those can be constructed 'on-the-other-side' # after deserialization. pickled_fn = fn if is_fork_enabled() else dill.dumps(fn) # Reporter has to be separated since it's used for cross-process communication and has to be passed as-is args_ = {k: v for (k, v) in args.items() if k not in ['reporter']} pickled_args = args_ if is_fork_enabled() else dill.dumps(args_) cross_process_args = { k: v for (k, v) in args.items() if k not in ['fn', 'args'] } with make_temp_directory() as tempdir: p = CustomProcess(target=partial(cls._worker, tempdir, task_id, pickled_fn, pickled_args), args=(return_list, gpu_ids, cross_process_args)) p.start() if 'reporter' in args: cp = Communicator.Create(p, local_reporter, dist_reporter) p.join() # Get processes outputs if not is_fork_enabled(): cls.__print(tempdir, task_id, 'out') cls.__print(tempdir, task_id, 'err') except Exception as e: logger.error('Exception in worker process: {}'.format(e)) ret = return_list[0] if len(return_list) > 0 else None return ret
def _preprocess_train(self, X_train, y_train, X_val, y_val, **kwargs): from fastai.data_block import FloatList from fastai.tabular import TabularList from fastai.core import defaults X_train = self.preprocess(X_train, fit=True) if X_val is not None: X_val = self.preprocess(X_val) from fastai.tabular import FillMissing, Categorify, Normalize self.procs = [FillMissing, Categorify, Normalize] if self.problem_type == REGRESSION and self.y_scaler is not None: y_train_norm = pd.Series(self.y_scaler.fit_transform(y_train.values.reshape(-1, 1)).reshape(-1)) y_val_norm = pd.Series(self.y_scaler.transform(y_val.values.reshape(-1, 1)).reshape(-1)) if y_val is not None else None logger.log(0, f'Training with scaled targets: {self.y_scaler} - !!! NN training metric will be different from the final results !!!') else: y_train_norm = y_train y_val_norm = y_val logger.log(15, f'Using {len(self.cont_columns)} cont features') df_train, train_idx, val_idx = self._generate_datasets(X_train, y_train_norm, X_val, y_val_norm) label_class = FloatList if self.problem_type == REGRESSION else None # additional workers are helping only when fork is enabled; in other mp modes, communication overhead reduces performance num_workers = defaults.cpus if is_fork_enabled() else 0 # Copy cat_columns and cont_columns because TabularList is mutating the list data = (TabularList.from_df(df_train, path=self.path, cat_names=self.cat_columns.copy(), cont_names=self.cont_columns.copy(), procs=self.procs) .split_by_idxs(train_idx, val_idx) .label_from_df(cols=LABEL, label_cls=label_class) .databunch(bs=self.params['bs'] if len(X_train) > self.params['bs'] else 32, num_workers=num_workers)) return data
def preprocess_train(self, X_train, y_train, X_val, y_val, **kwargs): from fastai.data_block import FloatList from fastai.tabular import TabularList from fastai.core import defaults self.cat_columns = self.feature_metadata.get_features(valid_raw_types=[R_OBJECT, R_CATEGORY, R_BOOL]) self.cont_columns = self.feature_metadata.get_features(valid_raw_types=[R_INT, R_FLOAT, R_DATETIME]) if self.problem_type == REGRESSION and self.y_scaler is not None: y_train_norm = pd.Series(self.y_scaler.fit_transform(y_train.values.reshape(-1, 1)).reshape(-1)) y_val_norm = pd.Series(self.y_scaler.transform(y_val.values.reshape(-1, 1)).reshape(-1)) if y_val is not None else None logger.log(0, f'Training with scaled targets: {self.y_scaler} - !!! NN training metric will be different from the final results !!!') else: y_train_norm = y_train y_val_norm = y_val try: X_train_stats = X_train.describe(include='all').T.reset_index() cat_cols_to_drop = X_train_stats[(X_train_stats['unique'] > self.params.get('max_unique_categorical_values', 10000)) | (X_train_stats['unique'].isna())]['index'].values except: cat_cols_to_drop = [] cat_cols_to_keep = [col for col in X_train.columns.values if (col not in cat_cols_to_drop)] cat_cols_to_use = [col for col in self.cat_columns if col in cat_cols_to_keep] logger.log(15, f'Using {len(cat_cols_to_use)}/{len(self.cat_columns)} categorical features') self.cat_columns = cat_cols_to_use self.cat_columns = [feature for feature in self.cat_columns if feature in list(X_train.columns)] self.cont_columns = [feature for feature in self.cont_columns if feature in list(X_train.columns)] for c in self.cat_columns: self.columns_fills[c] = MISSING for c in self.cont_columns: self.columns_fills[c] = X_train[c].mean() X_train = self.fill_missing(X_train) logger.log(15, f'Using {len(self.cont_columns)} cont features') X_train = self.fold_preprocess(X_train, fit=True) if X_val is not None: X_val = self.fill_missing(X_val) X_val = self.fold_preprocess(X_val) df_train, train_idx, val_idx = self._generate_datasets(X_train, y_train_norm, X_val, y_val_norm) label_class = FloatList if self.problem_type == REGRESSION else None # additional workers are helping only when fork is enabled; in other mp modes, communication overhead reduces performance num_workers = defaults.cpus if is_fork_enabled() else 0 # Copy cat_columns and cont_columns because TabularList is mutating the list data = (TabularList.from_df(df_train, path=self.path, cat_names=self.cat_columns.copy(), cont_names=self.cont_columns.copy(), procs=self.procs) .split_by_idxs(train_idx, val_idx) .label_from_df(cols=LABEL, label_cls=label_class) .databunch(bs=self.params['bs'] if len(X_train) > self.params['bs'] else 32, num_workers=num_workers)) return data
def preprocess_train(self, X_train, y_train, X_val, y_val, **kwargs): from fastai.data_block import FloatList from fastai.tabular import TabularList from fastai.tabular import FillMissing, Categorify, Normalize from fastai.core import defaults self.cat_columns = X_train.select_dtypes([ 'category', 'object', 'bool', 'bool_' ]).columns.values.tolist() self.cont_columns = X_train.select_dtypes([ 'float', 'float_', 'float16', 'float32', 'float64', 'int', 'int_', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64', 'datetime' ]).columns.values.tolist() if self.problem_type == REGRESSION and self.y_scaler is not None: y_train_norm = pd.Series(self.y_scaler.fit_transform(y_train.values.reshape(-1, 1)).reshape(-1)) y_val_norm = pd.Series(self.y_scaler.transform(y_val.values.reshape(-1, 1)).reshape(-1)) if y_val is not None else None logger.log(0, f'Training with scaled targets: {self.y_scaler} - !!! NN training metric will be different from the final results !!!') else: y_train_norm = y_train y_val_norm = y_val try: X_train_stats = X_train.describe(include='all').T.reset_index() cat_cols_to_drop = X_train_stats[(X_train_stats['unique'] > self.params.get('max_unique_categorical_values', 10000)) | (X_train_stats['unique'].isna())]['index'].values except: cat_cols_to_drop = [] cat_cols_to_keep = [col for col in X_train.columns.values if (col not in cat_cols_to_drop)] cat_cols_to_use = [col for col in self.cat_columns if col in cat_cols_to_keep] logger.log(15, f'Using {len(cat_cols_to_use)}/{len(self.cat_columns)} categorical features') self.cat_columns = cat_cols_to_use self.cat_columns = [feature for feature in self.cat_columns if feature in list(X_train.columns)] self.cont_columns = [feature for feature in self.cont_columns if feature in list(X_train.columns)] logger.log(15, f'Using {len(self.cont_columns)} cont features') X_train = self.fold_preprocess(X_train, fit=True) if X_val is not None: X_val = self.fold_preprocess(X_val) df_train, train_idx, val_idx = self._generate_datasets(X_train, y_train_norm, X_val, y_val_norm) label_class = FloatList if self.problem_type == REGRESSION else None procs = [FillMissing, Categorify, Normalize] # additional workers are helping only when fork is enabled; in other mp modes, communication overhead reduces performance num_workers = defaults.cpus if is_fork_enabled() else 0 data = (TabularList.from_df(df_train, path=self.path, cat_names=self.cat_columns, cont_names=self.cont_columns, procs=procs) .split_by_idxs(train_idx, val_idx) .label_from_df(cols=LABEL, label_cls=label_class) .databunch(bs=self.params['bs'] if len(X_train) > self.params['bs'] else 32, num_workers=num_workers)) return data
def generate_dataset_and_dataloader(self, data_list): self.dataset = mx.gluon.data.dataset.ArrayDataset( *data_list ) # Access ith embedding-feature via: self.dataset._data[self.data_desc.index('embed_'+str(i))].asnumpy() self.dataloader = mx.gluon.data.DataLoader( self.dataset, self.batch_size, shuffle=not self.is_test, last_batch='keep' if self.is_test else 'rollover', # local thread version is faster unless fork is enabled num_workers=self.num_dataloading_workers if is_fork_enabled() else 0, # need to use threadpool if forkserver is enabled, otherwise GIL will be locked # please note: this will make training slower thread_pool=is_forkserver_enabled(), ) # no need to shuffle test data
def _fit(self, X, y, X_val=None, y_val=None, time_limit=None, num_cpus=None, num_gpus=0, sample_weight=None, **kwargs): try_import_fastai() from fastai.tabular.model import tabular_config from fastai.tabular.learner import tabular_learner from fastcore.basics import defaults from .callbacks import AgSaveModelCallback, EarlyStoppingCallbackWithTimeLimit from .quantile_helpers import HuberPinballLoss import torch start_time = time.time() if sample_weight is not None: # TODO: support logger.log(15, "sample_weight not yet supported for NNFastAiTabularModel, this model will ignore them in training.") params = self._get_model_params() self.y_scaler = params.get('y_scaler', None) if self.y_scaler is None: if self.problem_type == REGRESSION: self.y_scaler = sklearn.preprocessing.StandardScaler() elif self.problem_type == QUANTILE: self.y_scaler = sklearn.preprocessing.MinMaxScaler() else: self.y_scaler = copy.deepcopy(self.y_scaler) if num_cpus is None: num_cpus = defaults.cpus # additional workers are helping only when fork is enabled; in other mp modes, communication overhead reduces performance num_workers = int(num_cpus / 2) if not is_fork_enabled(): num_workers = 0 if num_gpus is not None: if num_gpus == 0: # TODO: Does not obviously impact inference speed defaults.device = torch.device('cpu') else: defaults.device = torch.device('cuda') logger.log(15, f'Fitting Neural Network with parameters {params}...') data = self._preprocess_train(X, y, X_val, y_val) nn_metric, objective_func_name = self.__get_objective_func_name(self.stopping_metric) objective_func_name_to_monitor = self.__get_objective_func_to_monitor(objective_func_name) objective_optim_mode = np.less if objective_func_name in [ 'log_loss', 'root_mean_squared_error', 'mean_squared_error', 'mean_absolute_error', 'median_absolute_error', # Regression objectives 'pinball_loss', # Quantile objective ] else np.greater # TODO: calculate max emb concat layer size and use 1st layer as that value and 2nd in between number of classes and the value if params.get('layers', None) is not None: layers = params['layers'] elif self.problem_type in [REGRESSION, BINARY]: layers = [200, 100] elif self.problem_type == QUANTILE: base_size = max(len(self.quantile_levels) * 4, 128) layers = [base_size, base_size, base_size] else: base_size = max(data.c * 2, 100) layers = [base_size * 2, base_size] loss_func = None if self.problem_type == QUANTILE: loss_func = HuberPinballLoss(self.quantile_levels, alpha=self.params['alpha']) best_epoch_stop = params.get("best_epoch", None) # Use best epoch for refit_full. dls = data.dataloaders(bs=self.params['bs'] if len(X) > self.params['bs'] else 32) if self.problem_type == QUANTILE: dls.c = len(self.quantile_levels) self.model = tabular_learner( dls, layers=layers, metrics=nn_metric, config=tabular_config(ps=params['ps'], embed_p=params['emb_drop']), loss_func=loss_func, ) logger.log(15, self.model.model) save_callback = AgSaveModelCallback( monitor=objective_func_name_to_monitor, comp=objective_optim_mode, fname=self.name, best_epoch_stop=best_epoch_stop, with_opt=True ) if time_limit is not None: time_elapsed = time.time() - start_time time_left = time_limit - time_elapsed if time_left <= time_limit * 0.7: # if 30% of time was spent preprocessing, likely not enough time to train model raise TimeLimitExceeded else: time_left = None early_stopping = EarlyStoppingCallbackWithTimeLimit( monitor=objective_func_name_to_monitor, comp=objective_optim_mode, min_delta=params['early.stopping.min_delta'], patience=params['early.stopping.patience'], time_limit=time_left, best_epoch_stop=best_epoch_stop ) callbacks = [save_callback, early_stopping] with make_temp_directory() as temp_dir: with self.model.no_bar(): with self.model.no_logging(): original_path = self.model.path self.model.path = Path(temp_dir) self.model.fit_one_cycle(params['epochs'], params['lr'], cbs=callbacks) # Load the best one and export it self.model = self.model.load(self.name) if objective_func_name == 'log_loss': eval_result = self.model.validate(dl=dls.valid)[0] else: eval_result = self.model.validate(dl=dls.valid)[1] logger.log(15, f'Model validation metrics: {eval_result}') self.model.path = original_path self.params_trained['best_epoch'] = save_callback.best_epoch
def fit(self, train_data, tuning_data=None, time_limit='auto', presets=None, hyperparameters=None, **kwargs): """Automatic fit process for image prediction. Parameters ---------- train_data : pd.DataFrame or str Training data, can be a dataframe like image dataset. For dataframe like datasets, `image` and `label` columns are required. `image`: raw image paths. `label`: categorical integer id, starting from 0. For more details of how to construct a dataset for image predictor, check out: `http://preview.d2l.ai/d8/main/image_classification/getting_started.html`. If a string is provided, will search for d8 built-in datasets. tuning_data : pd.DataFrame or str, default = None Another dataset containing validation data reserved for model selection and hyperparameter-tuning, can be a dataframe like image dataset. If a string is provided, will search for k8 datasets. If `None`, the validation dataset will be randomly split from `train_data` according to `holdout_frac`. time_limit : int, default = 'auto' (defaults to 2 hours if no presets detected) Time limit in seconds, if `None`, will run until all tuning and training finished. If `time_limit` is hit during `fit`, the HPO process will interrupt and return the current best configuration. presets : list or str or dict, default = ['medium_quality_faster_train'] List of preset configurations for various arguments in `fit()`. Can significantly impact predictive accuracy, memory-footprint, and inference latency of trained models, and various other properties of the returned `predictor`. It is recommended to specify presets and avoid specifying most other `fit()` arguments or model hyperparameters prior to becoming familiar with AutoGluon. As an example, to get the most accurate overall predictor (regardless of its efficiency), set `presets='best_quality'`. To get good quality with faster inference speed, set `presets='good_quality_faster_inference'` Any user-specified arguments in `fit()` will override the values used by presets. If specifying a list of presets, later presets will override earlier presets if they alter the same argument. For precise definitions of the provided presets, see file: `autogluon/vision/configs/presets_configs.py`. Users can specify custom presets by passing in a dictionary of argument values as an element to the list. Available Presets: ['best_quality', 'high_quality_fast_inference', 'good_quality_faster_inference', 'medium_quality_faster_train'] It is recommended to only use one `quality` based preset in a given call to `fit()` as they alter many of the same arguments and are not compatible with each-other. Note that depending on your specific hardware limitation(# gpu, size of gpu memory...) your mileage may vary a lot, you may choose lower quality presets if necessary, and try to reduce `batch_size` if OOM("RuntimeError: CUDA error: out of memory") happens frequently during the `fit`. In-depth Preset Info: # Best predictive accuracy with little consideration to inference time or model size. Achieve even better results by specifying a large time_limit value. # Recommended for applications that benefit from the best possible model accuracy. best_quality={ 'hyperparameters': { 'model': Categorical('resnet50_v1b', 'resnet101_v1d', 'resnest200'), 'lr': Real(1e-5, 1e-2, log=True), 'batch_size': Categorical(8, 16, 32, 64, 128), 'epochs': 200, 'early_stop_patience': 50 }, 'hyperparameter_tune_kwargs': { 'num_trials': 1024, 'searcher': 'random', }, 'time_limit': 12*3600, }, # Good predictive accuracy with fast inference. # Recommended for applications that require reasonable inference speed and/or model size. good_quality_fast_inference={ 'hyperparameters': { 'model': Categorical('resnet50_v1b', 'resnet34_v1b'), 'lr': Real(1e-4, 1e-2, log=True), 'batch_size': Categorical(8, 16, 32, 64, 128), 'epochs': 150, 'early_stop_patience': 20 }, 'hyperparameter_tune_kwargs': { 'num_trials': 512, 'searcher': 'random', }, 'time_limit': 8*3600, }, # Medium predictive accuracy with very fast inference and very fast training time. # This is the default preset in AutoGluon, but should generally only be used for quick prototyping. medium_quality_faster_train={ 'hyperparameters': { 'model': 'resnet50_v1b', 'lr': 0.01, 'batch_size': 64, 'epochs': 50, 'early_stop_patience': 5 }, 'time_limit': 1*3600, }, # Medium predictive accuracy with very fast inference. # Comparing with `medium_quality_faster_train` it uses faster model but explores more hyperparameters. medium_quality_faster_inference={ 'hyperparameters': { 'model': Categorical('resnet18_v1b', 'mobilenetv3_small'), 'lr': Categorical(0.01, 0.005, 0.001), 'batch_size': Categorical(64, 128), 'epochs': Categorical(50, 100), 'early_stop_patience': 10 }, 'hyperparameter_tune_kwargs': { 'num_trials': 32, 'searcher': 'random', }, 'time_limit': 2*3600, }, hyperparameters : dict, default = None Extra hyperparameters for specific models. Accepted args includes(not limited to): epochs : int, default value based on network The `epochs` for model training. net : mx.gluon.Block The custom network. If defined, the model name in config will be ignored so your custom network will be used for training rather than pulling it from model zoo. optimizer : mx.Optimizer The custom optimizer object. If defined, the optimizer will be ignored in config but this object will be used in training instead. batch_size : int Mini batch size lr : float Trainer learning rate for optimization process. early_stop_patience : int, default=10 Number of epochs with no improvement after which train is early stopped. Use `None` to disable. early_stop_min_delta : float, default=1e-4 The small delta value to ignore when evaluating the metric. A large delta helps stablize the early stopping strategy against tiny fluctuation, e.g. 0.5->0.49->0.48->0.499->0.500001 is still considered as a good timing for early stopping. early_stop_baseline : float, default=None The minimum(baseline) value to trigger early stopping. For example, with `early_stop_baseline=0.5`, early stopping won't be triggered if the metric is less than 0.5 even if plateau is detected. Use `None` to disable. early_stop_max_value : float, default=None The max value for metric, early stop training instantly once the max value is achieved. Use `None` to disable. You can get the list of accepted hyperparameters in `config.yaml` saved by this predictor. **kwargs : holdout_frac : float, default = 0.1 The random split ratio for `tuning_data` if `tuning_data==None`. random_state : int, default = None The random_state(seed) for shuffling data, only used if `tuning_data==None`. Note that the `random_state` only affect the splitting process, not model training. If not specified(None), will leave the original random sampling intact. nthreads_per_trial : int, default = (# cpu cores) Number of CPU threads for each trial, if `None`, will detect the # cores on current instance. ngpus_per_trial : int, default = (# gpus) Number of GPUs to use for each trial, if `None`, will detect the # gpus on current instance. hyperparameter_tune_kwargs: dict, default = None num_trials : int, default = 1 The limit of HPO trials that can be performed within `time_limit`. The HPO process will be terminated when `num_trials` trials have finished or wall clock `time_limit` is reached, whichever comes first. search_strategy : str, default = 'random' Searcher strategy for HPO, 'random' by default. Options include: ‘random’ (random search), ‘bayesopt’ (Gaussian process Bayesian optimization), ‘grid’ (grid search). max_reward : float, default = None The reward threashold for stopping criteria. If `max_reward` is reached during HPO, the scheduler will terminate earlier to reduce time cost. scheduler_options : dict, default = None Extra options for HPO scheduler, please refer to :class:`autogluon.core.Searcher` for details. """ if self._problem_type is None: # options: multiclass, binary, regression self._problem_type = MULTICLASS assert self._problem_type in ( MULTICLASS, BINARY, REGRESSION), f"Invalid problem_type: {self._problem_type}" if self._eval_metric is None: if self._problem_type == REGRESSION: # options: rmse self._eval_metric = 'rmse' logger.log( 20, 'ImagePredictor sets rmse as default eval_metric for regression problems.' ) else: # options: accuracy self._eval_metric = 'accuracy' logger.log( 20, 'ImagePredictor sets accuracy as default eval_metric for classification problems.' ) # init/validate kwargs kwargs = self._validate_kwargs(kwargs) # unpack num_trials = kwargs['hyperparameter_tune_kwargs']['num_trials'] nthreads_per_trial = kwargs['nthreads_per_trial'] ngpus_per_trial = kwargs['ngpus_per_trial'] holdout_frac = kwargs['holdout_frac'] random_state = kwargs['random_state'] scheduler = kwargs['hyperparameter_tune_kwargs']['scheduler'] searcher = kwargs['hyperparameter_tune_kwargs']['searcher'] max_reward = kwargs['hyperparameter_tune_kwargs']['max_reward'] scheduler_options = kwargs['hyperparameter_tune_kwargs'][ 'scheduler_options'] # deep copy to avoid inplace overwrite train_data = copy.deepcopy(train_data) tuning_data = copy.deepcopy(tuning_data) log_level = verbosity2loglevel(self._verbosity) set_logger_verbosity(self._verbosity, logger=logger) if presets: if not isinstance(presets, list): presets = [presets] logger.log(20, f'Presets specified: {presets}') if time_limit == 'auto': # no presets, no user specified time_limit time_limit = 7200 logger.log(20, f'`time_limit=auto` set to `time_limit={time_limit}`.') use_rec = False if isinstance(train_data, str) and train_data == 'imagenet': # FIXME: imagenet does not work, crashes in validating data due to empty DataFrames. logger.warning( 'ImageNet is a huge dataset which cannot be downloaded directly, ' + 'please follow the data preparation tutorial in GluonCV.' + 'The following record files(symlinks) will be used: \n' + 'rec_train : ~/.mxnet/datasets/imagenet/rec/train.rec\n' + 'rec_train_idx : ~/.mxnet/datasets/imagenet/rec/train.idx\n' + 'rec_val : ~/.mxnet/datasets/imagenet/rec/val.rec\n' + 'rec_val_idx : ~/.mxnet/datasets/imagenet/rec/val.idx\n') train_data = pd.DataFrame({'image': [], self._label_inner: []}) tuning_data = pd.DataFrame({'image': [], self._label_inner: []}) use_rec = True if isinstance(train_data, str): from d8.image_classification import Dataset as D8D names = D8D.list() if train_data.lower() in names: train_data = D8D.get(train_data) else: valid_names = '\n'.join(names) raise ValueError( f'`train_data` {train_data} is not among valid list {valid_names}' ) if tuning_data is None: train_data, tuning_data = train_data.split(1 - holdout_frac) if isinstance(tuning_data, str): from d8.image_classification import Dataset as D8D names = D8D.list() if tuning_data.lower() in names: tuning_data = D8D.get(tuning_data) else: valid_names = '\n'.join(names) raise ValueError( f'`tuning_data` {tuning_data} is not among valid list {valid_names}' ) # data sanity check train_data = self._validate_data(train_data) train_labels = _get_valid_labels(train_data) self._label_cleaner = LabelCleaner.construct( problem_type=self._problem_type, y=train_labels, y_uncleaned=train_labels) train_labels_cleaned = self._label_cleaner.transform(train_labels) # converting to internal label set _set_valid_labels(train_data, train_labels_cleaned) tuning_data_validated = False if tuning_data is None: train_data, tuning_data, _, _ = generate_train_test_split( X=train_data, y=train_data[self._label_inner], problem_type=self._problem_type, test_size=holdout_frac) logger.info( 'Randomly split train_data into train[%d]/validation[%d] splits.', len(train_data), len(tuning_data)) train_data = train_data.reset_index(drop=True) tuning_data = tuning_data.reset_index(drop=True) tuning_data_validated = True train_data = self._validate_data(train_data) if isinstance(train_data, self.Dataset): train_data = self.Dataset(train_data, classes=train_data.classes) if tuning_data is not None and not tuning_data_validated: tuning_data = self._validate_data(tuning_data) # converting to internal label set _set_valid_labels( tuning_data, self._label_cleaner.transform(_get_valid_labels(tuning_data))) if isinstance(tuning_data, self.Dataset): tuning_data = self.Dataset(tuning_data, classes=tuning_data.classes) if self._classifier is not None: logging.getLogger("ImageClassificationEstimator").propagate = True self._classifier._logger.setLevel(log_level) self._fit_summary = self._classifier.fit(train_data, tuning_data, 1 - holdout_frac, random_state, resume=False) if hasattr(self._classifier, 'fit_history'): self._fit_summary[ 'fit_history'] = self._classifier.fit_history() return self # new HPO task if time_limit is not None and num_trials is None: num_trials = 99999 if time_limit is None and num_trials is None: raise ValueError( '`time_limit` and `num_trials` can not be `None` at the same time, ' 'otherwise the training will not be terminated gracefully.') config = { 'log_dir': self._log_dir, 'num_trials': 99999 if num_trials is None else max(1, num_trials), 'time_limits': 2147483647 if time_limit is None else max(1, time_limit), 'searcher': searcher, # needed for gluon-cv TODO: remove after gluon-cv is updated https://github.com/dmlc/gluon-cv/issues/1633 'search_strategy': searcher, 'scheduler': scheduler, } if max_reward is not None: config['max_reward'] = max_reward if nthreads_per_trial is not None: config['nthreads_per_trial'] = nthreads_per_trial elif is_fork_enabled(): # This is needed to address multiprocessing.context.TimeoutError in fork mode config['nthreads_per_trial'] = 0 if ngpus_per_trial is not None: config['ngpus_per_trial'] = ngpus_per_trial if isinstance(hyperparameters, dict): if 'batch_size' in hyperparameters: bs = hyperparameters['batch_size'] _check_gpu_memory_presets(bs, ngpus_per_trial, 4, 256) # 256MB per sample net = hyperparameters.pop('net', None) if net is not None: config['custom_net'] = net optimizer = hyperparameters.pop('optimizer', None) if optimizer is not None: config['custom_optimizer'] = optimizer # check if hyperparameters overwriting existing config for k, v in hyperparameters.items(): if k in config: raise ValueError( f'Overwriting {k} = {config[k]} to {v} by hyperparameters is ambiguous.' ) config.update(hyperparameters) if scheduler_options is not None: config.update(scheduler_options) if use_rec == True: config['use_rec'] = True if 'early_stop_patience' not in config: config['early_stop_patience'] = 10 if config['early_stop_patience'] == None: config['early_stop_patience'] = -1 # TODO(zhreshold): expose the transform function(or sign function) for converting custom metrics if 'early_stop_baseline' not in config or config[ 'early_stop_baseline'] == None: config['early_stop_baseline'] = -np.Inf if 'early_stop_max_value' not in config or config[ 'early_stop_max_value'] == None: config['early_stop_max_value'] = np.Inf # batch size cannot be larger than dataset size if ngpus_per_trial is not None and ngpus_per_trial > 1: min_value = ngpus_per_trial else: min_value = 1 bs = sanitize_batch_size(config.get('batch_size', 16), min_value=min_value, max_value=len(train_data)) config['batch_size'] = bs # verbosity if log_level > logging.INFO: logging.getLogger( 'gluoncv.auto.tasks.image_classification').propagate = False logging.getLogger("ImageClassificationEstimator").propagate = False logging.getLogger("ImageClassificationEstimator").setLevel( log_level) task = _ImageClassification(config=config, problem_type=self._problem_type) # GluonCV can't handle these separately - patching created config task.search_strategy = scheduler task.scheduler_options['searcher'] = searcher task._logger.setLevel(log_level) task._logger.propagate = True self._train_classes = train_data.classes with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") with MXNetErrorCatcher() as err: self._classifier = task.fit(train_data, tuning_data, 1 - holdout_frac, random_state) if err.exc_value is not None: raise RuntimeError(err.exc_value + err.hint) self._classifier._logger.setLevel(log_level) self._classifier._logger.propagate = True self._fit_summary = task.fit_summary() if hasattr(task, 'fit_history'): self._fit_summary['fit_history'] = task.fit_history() return self
def _fit(self, X, y, X_val=None, y_val=None, time_limit=None, num_cpus=None, num_gpus=0, sample_weight=None, **kwargs): try_import_fastai_v1() import torch from fastai.layers import LabelSmoothingCrossEntropy from fastai.tabular import tabular_learner from fastai.utils.mod_display import progress_disabled_ctx from fastai.core import defaults from .callbacks import EarlyStoppingCallbackWithTimeLimit, SaveModelCallback start_time = time.time() if sample_weight is not None: # TODO: support logger.log( 15, "sample_weight not yet supported for NNFastAiTabularModel, this model will ignore them in training." ) params = self.params.copy() self.y_scaler = params.get('y_scaler', None) if self.y_scaler is not None: self.y_scaler = copy.deepcopy(self.y_scaler) if num_cpus is None: num_cpus = defaults.cpus # additional workers are helping only when fork is enabled; in other mp modes, communication overhead reduces performance num_workers = int(num_cpus / 2) if not is_fork_enabled(): num_workers = 0 if num_gpus is not None: if num_gpus == 0: # TODO: Does not obviously impact inference speed defaults.device = torch.device('cpu') else: defaults.device = torch.device('cuda') logger.log(15, f'Fitting Neural Network with parameters {params}...') data = self._preprocess_train(X, y, X_val, y_val, num_workers=num_workers) nn_metric, objective_func_name = self.__get_objective_func_name() objective_func_name_to_monitor = self.__get_objective_func_to_monitor( objective_func_name) objective_optim_mode = 'min' if objective_func_name in [ 'root_mean_squared_error', 'mean_squared_error', 'mean_absolute_error', 'r2' # Regression objectives ] else 'auto' # TODO: calculate max emb concat layer size and use 1st layer as that value and 2nd in between number of classes and the value if params.get('layers', None) is not None: layers = params['layers'] elif self.problem_type in [REGRESSION, BINARY]: layers = [200, 100] else: base_size = max(len(data.classes) * 2, 100) layers = [base_size * 2, base_size] loss_func = None if self.problem_type in [BINARY, MULTICLASS ] and params.get('smoothing', 0.0) > 0.0: loss_func = LabelSmoothingCrossEntropy(params['smoothing']) ps = params['ps'] if type(ps) != list: ps = [ps] if time_limit: time_elapsed = time.time() - start_time time_left = time_limit - time_elapsed else: time_left = None best_epoch_stop = params.get("best_epoch", None) # Use best epoch for refit_full. early_stopping_fn = partial( EarlyStoppingCallbackWithTimeLimit, monitor=objective_func_name_to_monitor, mode=objective_optim_mode, min_delta=params['early.stopping.min_delta'], patience=params['early.stopping.patience'], time_limit=time_left, best_epoch_stop=best_epoch_stop) self.model = tabular_learner(data, layers=layers, ps=ps, emb_drop=params['emb_drop'], metrics=nn_metric, loss_func=loss_func, callback_fns=[early_stopping_fn]) logger.log(15, self.model.model) with make_temp_directory() as temp_dir: save_callback = SaveModelCallback( self.model, monitor=objective_func_name_to_monitor, mode=objective_optim_mode, name=self.name, best_epoch_stop=best_epoch_stop) with progress_disabled_ctx(self.model) as model: original_path = model.path model.path = Path(temp_dir) model.fit_one_cycle(params['epochs'], params['lr'], callbacks=save_callback) # Load the best one and export it model.load(self.name) if objective_func_name == 'log_loss': eval_result = model.validate()[0] else: eval_result = model.validate()[1].numpy().reshape(-1)[0] logger.log(15, f'Model validation metrics: {eval_result}') model.path = original_path self.params_trained['best_epoch'] = save_callback.best_epoch