def optimize_params(self, trial, n_prune_evals_per_trial: int = 4, n_tests_per_eval: int = 1, speedup_factor: int = 10): env_params = self.optimize_env_params(trial) full_train_len = self.test_set_percentage * len(self.feature_df) optimize_train_len = int(self.validation_set_percentage * full_train_len) train_len = int(optimize_train_len / speedup_factor) train_start = optimize_train_len - train_len train_df = self.feature_df[train_start:optimize_train_len] validation_df = self.feature_df[optimize_train_len:] train_env = DummyVecEnv( [lambda: BitcoinTradingEnv(train_df, **env_params)]) validation_env = DummyVecEnv( [lambda: BitcoinTradingEnv(validation_df, **env_params)]) model_params = self.optimize_agent_params(trial) model = self.model(self.policy, train_env, verbose=self.model_verbose, nminibatches=self.nminibatches, tensorboard_log=self.tensorboard_path, **model_params) last_reward = -np.finfo(np.float16).max evaluation_interval = int(train_len / n_prune_evals_per_trial) for eval_idx in range(n_prune_evals_per_trial): try: model.learn(evaluation_interval) except AssertionError: raise rewards = [] n_episodes, reward_sum = 0, 0.0 obs = validation_env.reset() while n_episodes < n_tests_per_eval: action, _ = model.predict(obs) obs, reward, done, _ = validation_env.step(action) reward_sum += reward if done: rewards.append(reward_sum) reward_sum = 0.0 n_episodes += 1 obs = validation_env.reset() last_reward = np.mean(rewards) trial.report(-1 * last_reward, eval_idx) if trial.should_prune(eval_idx): raise optuna.structs.TrialPruned() return -1 * last_reward
def test(self, model_epoch: int = 0, should_render: bool = True): env_params = self.get_env_params() train_len = int(self.test_set_percentage * len(self.feature_df)) test_df = self.feature_df[train_len:] test_env = DummyVecEnv( [lambda: BitcoinTradingEnv(test_df, **env_params)]) model_path = path.join('data', 'agents', f'{self.study_name}__{model_epoch}.pkl') model = self.model.load(model_path, env=test_env) self.logger.info(f'Testing model ({self.study_name}__{model_epoch})') obs, done, reward_sum = test_env.reset(), False, 0 while not done: action, _states = model.predict(obs) obs, reward, done, _ = test_env.step(action) reward_sum += reward if should_render: test_env.render(mode='human') self.logger.info( f'Finished testing model ({self.study_name}__{model_epoch}): ${"{:.2f}".format(reward_sum)}' )
def train(self, n_epochs: int = 1, iters_per_epoch: int = 1, test_trained_model: bool = False, render_trained_model: bool = False): self.initialize_optuna() env_params = self.get_env_params() train_len = int(self.test_set_percentage * len(self.feature_df)) train_df = self.feature_df[:train_len] train_env = DummyVecEnv( [lambda: BitcoinTradingEnv(train_df, **env_params)]) model_params = self.get_model_params() model = self.model(self.policy, train_env, verbose=self.model_verbose, nminibatches=self.nminibatches, tensorboard_log=self.tensorboard_path, **model_params) self.logger.info(f'Training for {n_epochs} epochs') n_timesteps = len(train_df) * iters_per_epoch for model_epoch in range(0, n_epochs): self.logger.info( f'[{model_epoch}] Training for: {n_timesteps} time steps') model.learn(total_timesteps=n_timesteps) model_path = path.join('data', 'agents', f'{self.study_name}__{model_epoch}.pkl') model.save(model_path) if test_trained_model: self.test(model_epoch, should_render=render_trained_model) self.logger.info(f'Trained {n_epochs} models')
def initialize_optuna(self): try: train_env = DummyVecEnv( [lambda: BitcoinTradingEnv(self.feature_df)]) model = self.Model(self.Policy, train_env, nminibatches=1) self.study_name = f'{model.__class__.__name__}__{model.act_model.__class__.__name__}__{self.reward_strategy}' except: self.study_name = f'UnknownModel__UnknownPolicy__{self.reward_strategy}' self.optuna_study = optuna.create_study(study_name=self.study_name, storage=self.params_db_path, load_if_exists=True) self.logger.debug('Initialized Optuna:') try: self.logger.debug( f'Best reward in ({len(self.optuna_study.trials)}) trials: {self.optuna_study.best_value}' ) except: self.logger.debug('No trials have been finished yet.')
from lib.util.indicators import add_indicators df = pd.read_csv('./data/coinbase_hourly.csv') df = df.drop(['Symbol'], axis=1) df = df.sort_values(['Date']) df = add_indicators(df.reset_index()) test_len = int(len(df) * 0.2) train_len = int(len(df)) - test_len test_df = df[train_len:] profit_study = optuna.load_study(study_name='ppo2_profit', storage='sqlite:///params.db') profit_env = DummyVecEnv([lambda: BitcoinTradingEnv( test_df, reward_func="profit", forecast_steps=int(profit_study.best_trial.params['forecast_steps']), forecast_alpha=profit_study.best_trial.params['forecast_alpha'])]) sortino_study = optuna.load_study(study_name='ppo2_sortino', storage='sqlite:///params.db') sortino_env = DummyVecEnv([lambda: BitcoinTradingEnv( test_df, reward_func="profit", forecast_steps=int(sortino_study.best_trial.params['forecast_steps']), forecast_alpha=sortino_study.best_trial.params['forecast_alpha'])]) # calmar_study = optuna.load_study(study_name='ppo2_sortino', # storage='sqlite:///params.db') # calmar_env = DummyVecEnv([lambda: BitcoinTradingEnv( # test_df, reward_func="profit", forecast_steps=int(calmar_study.best_trial.params['forecast_steps']), forecast_alpha=calmar_study.best_trial.params['forecast_alpha'])]) omega_study = optuna.load_study(study_name='ppo2_omega', storage='sqlite:///params.db') omega_env = DummyVecEnv([lambda: BitcoinTradingEnv( test_df, reward_func="profit", forecast_steps=int(omega_study.best_trial.params['forecast_steps']), forecast_alpha=omega_study.best_trial.params['forecast_alpha'])])