def main(): # argument parser to specify hyperparameters parser = argparse.ArgumentParser() parser.add_argument("--train", help="training run", action="store_true") parser.add_argument("--test", help="test run", action="store_true") parser.add_argument("--network_type", default="mlp", help="""type of the policy network ["mlp", "gru"]""", type=str) parser.add_argument( "--num_agents", default=8, help="number of environments and agents running in parallel", type=int) parser.add_argument( "--num_steps", default=32, help="number of steps on each environment for every update", type=int) parser.add_argument("--environment", default="LunarLanderContinuous-v2", help="gym environment type", type=str) args = parser.parse_args() # Launch tensorboard tb = program.TensorBoard() tb.configure(argv=[None, '--logdir', './logs']) tb.launch() # Initialize ray ray.init(memory=1024 * 512 * 200, object_store_memory=1024 * 1024 * 1000) if args.train: # start training run with given hyperparameters coord = Coordinator(num_agents=args.num_agents, network=args.network_type, env_name=args.environment, num_steps=args.num_steps) coord.train() if args.test: # start run with latest model checkpoint from test import test_run test_run(network=args.network_type, environment=args.environment) ray.shutdown()
import os import sys current_path = os.getcwd() sys.path.append(current_path) from hypertune import start_commander, start_workers from coordinator import Coordinator from config import mode, number_workers, tuned_config, fix_random_seed import numpy as np if fix_random_seed: np.random.seed(123) # %matplotlib inline if mode == 'parallel': start_commander() workers = start_workers(number_workers) else: model = Coordinator(tuned_config, '2900') #################### to do #################### # model = Coordinator(tuned_config, '-5.28') # model.restore_price_predictor('-5.28-80000-') ############################################## model.train('single', True) model.back_test('test', 2500, True)
coo = Coordinator(agent) env = PortfolioEnv(df_train, steps=256, trading_cost=0.00007, window_length=window_length, scale=False, random_reset=False) ob = env.reset() for i in range(5): print(coo.action_values(ob)) ob, a, r, ob_ = env.step(np.ones(5)) coo.train(env, total_training_step=total_training_step, replay_period=replay_period, tensorboard=True) env_test = PortfolioEnv(df_train, steps=2500, trading_cost=0.0, window_length=window_length, scale=False, random_reset=False) ob = env_test.reset() for i in range(10): print(coo.action_values(ob)) print(np.argmax(coo.action_values(ob))) ob, a, r, ob_ = env.step(np.ones(5)) # coo.restore('')
import os import sys current_path = os.getcwd() sys.path.append(current_path) from coordinator import Coordinator from config import tuned_config model = Coordinator(tuned_config, 'name') model.train() model.restore('name-step')