def demo3_custom_env_fin_rl(): import elegantrl.agent as agent """DEMO 3: Custom Continuous action env: FinanceStock-v1""" args = Arguments(if_on_policy=True) args.agent = agent.AgentGaePPO() # PPO+GAE (on-policy) from elegantrl.env import FinanceMultiStockEnv # a standard env for ElegantRL, not need PreprocessEnv() args.env = FinanceMultiStockEnv(if_train=True, train_beg=0, train_len=1024) args.env_eval = FinanceMultiStockEnv(if_train=False, train_beg=0, train_len=1024) # eva_len = 1699 - train_len args.reward_scale = 2 ** 0 # RewardRange: 0 < 1.0 < 1.25 < args.break_step = int(5e6) args.net_dim = 2 ** 8 args.max_step = args.env.max_step args.max_memo = (args.max_step - 1) * 8 args.batch_size = 2 ** 11 args.repeat_times = 2 ** 4 args.eval_times1 = 2 ** 2 args.eval_times2 = 2 ** 4 args.if_break_early = False "TotalStep: 2e5, TargetReward: 1.25, UsedTime: 200s" "TotalStep: 4e5, TargetReward: 1.50, UsedTime: 400s" # train_and_evaluate(args) args.rollout_num = 8 train_and_evaluate__multiprocessing(args)
def demo3_custom_env_fin_rl(): from elegantrl.agent import AgentGaePPO '''choose an DRL algorithm''' args = Arguments(if_on_policy=True) args.agent = AgentGaePPO() # PPO+GAE (on-policy) from elegantrl.env import FinanceMultiStockEnv # a standard env for ElegantRL, not need PreprocessEnv() args.env = FinanceMultiStockEnv(if_train=True, train_beg=0, train_len=1024) args.env_eval = FinanceMultiStockEnv( if_train=False, train_beg=0, train_len=1024) # eva_len = 1699 - train_len args.reward_scale = 2**0 # RewardRange: 0 < 1.0 < 1.25 < args.break_step = int(5e6) args.net_dim = 2**8 args.max_step = args.env.max_step args.max_memo = (args.max_step - 1) * 8 args.batch_size = 2**11 args.repeat_times = 2**4 args.eval_times1 = 2**2 args.eval_times2 = 2**4 args.if_allow_break = False "TotalStep: 2e5, TargetReward: 1.25, UsedTime: 200s" "TotalStep: 4e5, TargetReward: 1.50, UsedTime: 400s" "TotalStep: 10e5, TargetReward: 1.62, UsedTime: 1000s" '''train and evaluate''' # train_and_evaluate(args) args.rollout_num = 8 train_and_evaluate__multiprocessing(args)
def run__demo(): import gym gym.logger.set_level(40) # Block warning: 'WARN: Box bound precision lowered by casting to float32' from elegantrl.env import decorate_env, FinanceMultiStockEnv args = Arguments(agent_rl=None, env=None, gpu_id=None) # see Arguments() to see hyper-parameters '''DEMO 1: Discrete action env: CartPole-v0 of gym''' from elegantrl.agent import AgentDoubleDQN args.agent_rl = AgentDoubleDQN # choose an DRL algorithm args.env = decorate_env(env=gym.make('CartPole-v0')) args.net_dim = 2 ** 7 # change a default hyper-parameters args.env = decorate_env(env=gym.make('LunarLander-v2')) args.net_dim = 2 ** 8 # change a default hyper-parameters train_and_evaluate(args) exit() '''DEMO 2: Continuous action env: gym.box2D''' from elegantrl.agent import AgentSAC # AgentTD3 args.agent_rl = AgentSAC # off-policy env = gym.make('Pendulum-v0') env.target_reward = -200 # set target_reward manually for env 'Pendulum-v0' args.env = decorate_env(env=env) # args.env = decorate_env(env=gym.make('LunarLanderContinuous-v2')) # args.env = decorate_env(env=gym.make('BipedalWalker-v3')) # recommend args.gamma = 0.95 train_and_evaluate(args) exit() from elegantrl.agent import AgentGaePPO # AgentPPO args.agent_rl = AgentGaePPO # on-policy args = Arguments(if_on_policy=True) # on-policy has different hyper-parameters from off-policy env = gym.make('Pendulum-v0') env.target_reward = -200 # set target_reward manually for env 'Pendulum-v0' args.env = decorate_env(env=env) # args.env = decorate_env(env=gym.make('LunarLanderContinuous-v2')) # args.env = decorate_env(env=gym.make('BipedalWalker-v3')) # recommend args.gamma = 0.95 train_and_evaluate(args) exit() '''DEMO 3: Custom Continuous action env: FinanceStock-v1''' args = Arguments(if_on_policy=True) from elegantrl.agent import AgentGaePPO # AgentPPO args.agent_rl = AgentGaePPO # PPO+GAE (on-policy) from elegantrl.env import FinanceMultiStockEnv args.env = FinanceMultiStockEnv() # a standard env for ElegantRL, not need decorate_env() args.break_step = int(5e6 * 4) # 5e6 (15e6) UsedTime 3,000s (9,000s) args.net_dim = 2 ** 8 args.max_step = 1699 args.max_memo = (args.max_step - 1) * 16 args.batch_size = 2 ** 11 args.repeat_times = 2 ** 4 train_and_evaluate(args) exit()
def run__demo(): import elegantrl.BetaWarning.agent as agent from elegantrl.BetaWarning.env import prep_env # from elegantrl.main import Arguments, train_and_evaluate, train_and_evaluate__multiprocessing import gym gym.logger.set_level( 40 ) # Block warning: 'WARN: Box bound precision lowered by casting to float32' """DEMO 1: Discrete action env: CartPole-v0 of gym""" args = Arguments(agent_rl=None, env=None, gpu_id=None) # see Arguments() to see hyper-parameters args.agent_rl = agent.AgentD3QN # choose an DRL algorithm args.env = prep_env(env=gym.make('CartPole-v0')) args.net_dim = 2**7 # change a default hyper-parameters # args.env = decorate_env(env=gym.make('LunarLander-v2')) # args.net_dim = 2 ** 8 # change a default hyper-parameters train_and_evaluate(args) """DEMO 2: Continuous action env, gym.Box2D""" if_on_policy = False args = Arguments( if_on_policy=if_on_policy ) # on-policy has different hyper-parameters from off-policy if if_on_policy: args.agent_rl = agent.AgentGaePPO # on-policy: AgentPPO, AgentGaePPO else: args.agent_rl = agent.AgentModSAC # off-policy: AgentSAC, AgentModPPO, AgentTD3, AgentDDPG env = gym.make('Pendulum-v0') env.target_reward = -200 # set target_reward manually for env 'Pendulum-v0' args.env = prep_env(env=env) args.net_dim = 2**7 # change a default hyper-parameters # args.env = decorate_env(env=gym.make('LunarLanderContinuous-v2')) # args.env = decorate_env(env=gym.make('BipedalWalker-v3')) # recommend args.gamma = 0.95 train_and_evaluate(args) """DEMO 3: Custom Continuous action env: FinanceStock-v1""" args = Arguments(if_on_policy=True) args.agent_rl = agent.AgentGaePPO # PPO+GAE (on-policy) from elegantrl.env import FinanceMultiStockEnv args.env = FinanceMultiStockEnv( if_train=True) # a standard env for ElegantRL, not need decorate_env() args.env_eval = FinanceMultiStockEnv(if_train=False) args.break_step = int(5e6) # 5e6 (15e6) UsedTime 3,000s (9,000s) args.net_dim = 2**8 args.target_step = args.env.max_step args.max_memo = (args.max_step - 1) * 8 args.batch_size = 2**11 args.repeat_times = 2**4 args.eval_times1 = 2**4 # train_and_evaluate(args) args.rollout_num = 4 train_and_evaluate__multiprocessing(args) args.env_eval.draw_cumulative_return(args, torch) '''DEMO 4: PyBullet(MuJoCo) Robot Env''' args = Arguments(if_on_policy=True) args.agent_rl = agent.AgentGaePPO # agent.AgentPPO import pybullet_envs # for python-bullet-gym dir(pybullet_envs) # args.env = decorate_env(gym.make('AntBulletEnv-v0')) args.env = prep_env(gym.make('ReacherBulletEnv-v0')) # args.repeat_times=8 # args.max_memo=args.target_step =4096 args.break_step = int(4e8) # (4e4) 8e5, UsedTime: (300s) 700s args.if_break_early = False args.eval_times1 = 2**2 args.eval_times1 = 2**4 args.rollout_num = 4 train_and_evaluate__multiprocessing(args) # """DEMO 5: Discrete action env: CartPole-v0 of gym""" import pybullet_envs # for python-bullet-gym args = Arguments(agent_rl=None, env=None, gpu_id=0) # see Arguments() to see hyper-parameters args.agent_rl = agent.AgentTD3 # choose an DRL algorithm args.env = prep_env(env=gym.make('ReacherBulletEnv-v0')) args.net_dim = 2**7 # change a default hyper-parameters args.if_per = True args.break_step = int(2e20) # (4e4) 8e5, UsedTime: (300s) 700s # train_and_evaluate(args) train_and_evaluate__multiprocessing(args) exit(0)
def run__demo(): # from elegantrl.tutorial.run import Arguments, train_and_evaluate from elegantrl.tutorial.env import PreprocessEnv import gym gym.logger.set_level( 40 ) # Block warning: 'WARN: Box bound precision lowered by casting to float32' """DEMO 1: Discrete action env of gym""" args = Arguments(agent=None, env=None, gpu_id=None) # see Arguments() to see hyper-parameters '''choose an DRL algorithm''' from elegantrl.tutorial.agent import AgentDoubleDQN # AgentDQN args.agent = AgentDoubleDQN() '''choose environment''' # args.env = PreprocessEnv(env=gym.make('CartPole-v0')) # args.net_dim = 2 ** 7 # change a default hyper-parameters # args.batch_size = 2 ** 7 "TotalStep: 2e3, TargetReward: , UsedTime: 10s" args.env = PreprocessEnv(env=gym.make('LunarLander-v2')) args.net_dim = 2**8 args.batch_size = 2**8 "TotalStep: 6e4, TargetReward: 200, UsedTime: 600s" '''train and evaluate''' train_and_evaluate(args) exit() '''DEMO 2: Continuous action env if gym''' '''DEMO 2.1: choose an off-policy DRL algorithm''' from elegantrl.tutorial.agent import AgentSAC # AgentTD3, AgentDDPG args = Arguments(if_on_policy=False) args.agent = AgentSAC() '''DEMO 2.2: choose an on-policy DRL algorithm''' from elegantrl.tutorial.agent import AgentPPO # AgentGaePPO args = Arguments( if_on_policy=True ) # hyper-parameters of on-policy is different from off-policy args.agent = AgentPPO() '''choose environment''' env = gym.make('Pendulum-v0') env.target_reward = -200 # set target_reward manually for env 'Pendulum-v0' args.env = PreprocessEnv(env=env) args.reward_scale = 2**-3 # RewardRange: -1800 < -200 < -50 < 0 args.net_dim = 2**7 args.batch_size = 2**7 "TotalStep: 4e5, TargetReward: -200, UsedTime: 400s" # args.env = PreprocessEnv(env=gym.make('LunarLanderContinuous-v2')) # args.reward_scale = 2 ** 0 # RewardRange: -800 < -200 < 200 < 302 "TotalStep: 9e4, TargetReward: 200, UsedTime: 2500s" # args.env = PreprocessEnv(env=gym.make('BipedalWalker-v3')) # args.reward_scale = 2 ** 0 # RewardRange: -200 < -150 < 300 < 334 # args.break_step = int(2e5) # args.if_allow_break = False "TotalStep: 2e5, TargetReward: 300, UsedTime: 5000s" '''train and evaluate''' train_and_evaluate(args) # train_and_evaluate__multiprocessing(args) # try multiprocessing in complete version exit() '''DEMO 3: Custom Continuous action env: FinanceStock-v1''' args = Arguments(if_on_policy=True) '''choose an DRL algorithm''' from elegantrl.tutorial.agent import AgentPPO args.agent = AgentPPO() from elegantrl.env import FinanceMultiStockEnv # a standard env for ElegantRL, not need PreprocessEnv() args.env = FinanceMultiStockEnv(if_train=True, train_beg=0, train_len=1024) args.env_eval = FinanceMultiStockEnv( if_train=False, train_beg=0, train_len=1024) # eva_len = 1699 - train_len args.reward_scale = 2**0 # RewardRange: 0 < 1.0 < 1.25 < args.break_step = int(5e6) args.max_step = args.env.max_step args.max_memo = (args.max_step - 1) * 8 args.batch_size = 2**11 args.if_allow_break = False "TotalStep: 2e5, TargetReward: 1.25, UsedTime: 200s" "TotalStep: 4e5, TargetReward: 1.50, UsedTime: 400s" "TotalStep: 10e5, TargetReward: 1.62, UsedTime: 1000s" '''train and evaluate''' train_and_evaluate(args) # args.rollout_num = 8 # train_and_evaluate__multiprocessing(args) # try multiprocessing in complete version exit()