def main(): parser = argparse.ArgumentParser() parser.add_argument("--config", default="./configs/" + CONFIG_FILE) option = parser.parse_args() config = json.load(open(option.config)) model_name = config["model"] features = config["features"] target_name = config["target_name"] params = config["model_params"] #lineNotify("leanrning start") X, y, test = load_data(features, target_name) #logger get_logger(VERSION).info(test.shape) get_logger(VERSION).info(option.config) get_logger(VERSION).info(params) get_logger(VERSION).info("====== CV score ======") trainer = Trainer(model_name, params, features) val_score, val_log = trainer.cross_validation(X, y, test, random_state=RS, n_split=K, importance_f=imp, task=TASK) get_logger(VERSION).info(np.mean(val_score["lgb"])) get_logger(VERSION).info(val_log) # prediction preds = trainer.predict() #submit trainer.create_submit(preds, val_score)
def main(cfg: DictConfig): print('Nishika Second-hand Apartment Price Training') cur_dir = hydra.utils.get_original_cwd() os.chdir(cur_dir) data_dir = './input' seed_everything(cfg.data.seed) experiment = Experiment(api_key=cfg.exp.api_key, project_name=cfg.exp.project_name, auto_output_logging='simple', auto_metric_logging=False) experiment.log_parameters(dict(cfg.data)) # Config #################################################################################### del_tar_col = ['取引時点'] id_col = 'ID' tar_col = '取引価格(総額)_log' g_col = 'year' criterion = MAE cv = KFold(n_splits=cfg.data.n_splits, shuffle=True, random_state=cfg.data.seed) # cv = GroupKFold(n_splits=5) # Load Data #################################################################################### if cfg.exp.use_pickle: # pickleから読み込み df = unpickle('./input/data.pkl') else: df = load_data(data_dir, sampling=cfg.data.sampling, seed=cfg.data.seed, id_col=id_col, target_col=tar_col) # Preprocessing print('Preprocessing') df = preprocessing(df, cfg) # pickle形式で保存 to_pickle('./input/data.pkl', df) try: experiment.log_asset(file_data='./input/data.pkl', file_name='data.pkl') except: pass features = [c for c in df.columns if c not in del_tar_col] # Model #################################################################################### model = None if cfg.exp.model == 'lgb': model = LGBMModel(dict(cfg.lgb)) elif cfg.exp.model == 'cat': model = CatBoostModel(dict(cfg.cat)) # Train & Predict ############################################################################## trainer = Trainer(model, id_col, tar_col, g_col, features, cv, criterion, experiment) trainer.fit(df) trainer.predict(df) trainer.get_feature_importance()
class Emulator(tune.Trainable): def _setup(self, config): self.config = config self.hc_config = config['hc_config'] self.is_tune = self.hc_config['is_tune'] activation = torch.nn.ReLU() train_loader = get_dataloader( self.hc_config, partition_set='train', is_tune=self.is_tune, small_aoi=self.hc_config['small_aoi'], fold=-1, batch_size=self.hc_config['batch_size'], shuffle=True, drop_last=True, num_workers=self.hc_config['num_workers'], pin_memory=self.hc_config['pin_memory']) eval_loader = get_dataloader(self.hc_config, partition_set='eval', is_tune=self.is_tune, small_aoi=self.hc_config['small_aoi'], fold=-1, batch_size=self.hc_config['batch_size'], shuffle=True, drop_last=False, num_workers=self.hc_config['num_workers'], pin_memory=self.hc_config['pin_memory']) if not self.hc_config['is_temporal']: model = DENSE(input_size=train_loader.dataset.num_dynamic + train_loader.dataset.num_static, hidden_size=config['dense_hidden_size'], num_layers=config['dense_num_layers'], activation=activation, dropout_in=config['dropout_in'], dropout_linear=config['dropout_linear']) else: model = LSTM(num_dynamic=train_loader.dataset.num_dynamic, num_static=train_loader.dataset.num_static, lstm_hidden_size=config['lstm_hidden_size'], lstm_num_layers=config['lstm_num_layers'], dense_hidden_size=config['dense_hidden_size'], dense_num_layers=config['dense_num_layers'], output_size=1, dropout_in=config['dropout_in'], dropout_lstm=config['dropout_lstm'], dropout_linear=config['dropout_linear'], dense_activation=activation) if not isinstance(model, BaseModule): raise ValueError( 'The model is not a subclass of models.modules:BaseModule') if self.hc_config['optimizer'] == 'Adam': optimizer = torch.optim.AdamW(model.parameters(), config['learning_rate'], weight_decay=config['weight_decay']) else: raise ValueError( f'Optimizer {self.hc_config["optimizer"]} not defined.') if self.hc_config['loss_fn'] == 'MSE': loss_fn = torch.nn.MSELoss() else: raise ValueError( f'Loss function {self.hc_config["loss_fn"]} not defined.') self.trainer = Trainer( train_loader=train_loader, eval_loader=eval_loader, model=model, optimizer=optimizer, loss_fn=loss_fn, train_seq_length=self.hc_config['time']['train_seq_length'], train_sample_size=self.hc_config['train_sample_size']) def _train(self): train_stats = self.trainer.train_epoch() eval_stats = self.trainer.eval_epoch() stats = {**train_stats, **eval_stats} # Disable early stopping before 'grace period' is reched. if stats['epoch'] < self.hc_config['grace_period']: stats['patience_counter'] = -1 return stats def _stop(self): if not self.is_tune: self._save(os.path.dirname(os.path.dirname(self.logdir))) def _predict(self, prediction_dir, predict_training_set=False): self.trainer.predict(prediction_dir, predict_training_set) def _save(self, path): path = os.path.join(path, 'model.pth') return self.trainer.save(path) def _restore(self, path): self.trainer.restore(path)