Пример #1
0
def demo3_custom_env_fin_rl():
    import elegantrl.agent as agent

    """DEMO 3: Custom Continuous action env: FinanceStock-v1"""
    args = Arguments(if_on_policy=True)
    args.agent = agent.AgentGaePPO()  # PPO+GAE (on-policy)

    from elegantrl.env import FinanceMultiStockEnv  # a standard env for ElegantRL, not need PreprocessEnv()
    args.env = FinanceMultiStockEnv(if_train=True, train_beg=0, train_len=1024)
    args.env_eval = FinanceMultiStockEnv(if_train=False, train_beg=0, train_len=1024)  # eva_len = 1699 - train_len
    args.reward_scale = 2 ** 0  # RewardRange: 0 < 1.0 < 1.25 <
    args.break_step = int(5e6)
    args.net_dim = 2 ** 8
    args.max_step = args.env.max_step
    args.max_memo = (args.max_step - 1) * 8
    args.batch_size = 2 ** 11
    args.repeat_times = 2 ** 4
    args.eval_times1 = 2 ** 2
    args.eval_times2 = 2 ** 4
    args.if_break_early = False
    "TotalStep: 2e5, TargetReward: 1.25, UsedTime: 200s"
    "TotalStep: 4e5, TargetReward: 1.50, UsedTime: 400s"

    # train_and_evaluate(args)
    args.rollout_num = 8
    train_and_evaluate__multiprocessing(args)
Пример #2
0
def demo3_custom_env_fin_rl():
    from elegantrl.agent import AgentGaePPO
    '''choose an DRL algorithm'''
    args = Arguments(if_on_policy=True)
    args.agent = AgentGaePPO()  # PPO+GAE (on-policy)

    from elegantrl.env import FinanceMultiStockEnv  # a standard env for ElegantRL, not need PreprocessEnv()
    args.env = FinanceMultiStockEnv(if_train=True, train_beg=0, train_len=1024)
    args.env_eval = FinanceMultiStockEnv(
        if_train=False, train_beg=0,
        train_len=1024)  # eva_len = 1699 - train_len
    args.reward_scale = 2**0  # RewardRange: 0 < 1.0 < 1.25 <
    args.break_step = int(5e6)
    args.net_dim = 2**8
    args.max_step = args.env.max_step
    args.max_memo = (args.max_step - 1) * 8
    args.batch_size = 2**11
    args.repeat_times = 2**4
    args.eval_times1 = 2**2
    args.eval_times2 = 2**4
    args.if_allow_break = False
    "TotalStep:  2e5, TargetReward: 1.25, UsedTime:  200s"
    "TotalStep:  4e5, TargetReward: 1.50, UsedTime:  400s"
    "TotalStep: 10e5, TargetReward: 1.62, UsedTime: 1000s"
    '''train and evaluate'''
    # train_and_evaluate(args)
    args.rollout_num = 8
    train_and_evaluate__multiprocessing(args)
Пример #3
0
def run__demo():
    import gym
    gym.logger.set_level(40)  # Block warning: 'WARN: Box bound precision lowered by casting to float32'
    from elegantrl.env import decorate_env, FinanceMultiStockEnv

    args = Arguments(agent_rl=None, env=None, gpu_id=None)  # see Arguments() to see hyper-parameters

    '''DEMO 1: Discrete action env: CartPole-v0 of gym'''
    from elegantrl.agent import AgentDoubleDQN
    args.agent_rl = AgentDoubleDQN  # choose an DRL algorithm
    args.env = decorate_env(env=gym.make('CartPole-v0'))
    args.net_dim = 2 ** 7  # change a default hyper-parameters
    args.env = decorate_env(env=gym.make('LunarLander-v2'))
    args.net_dim = 2 ** 8  # change a default hyper-parameters
    train_and_evaluate(args)
    exit()

    '''DEMO 2: Continuous action env: gym.box2D'''
    from elegantrl.agent import AgentSAC  # AgentTD3
    args.agent_rl = AgentSAC  # off-policy
    env = gym.make('Pendulum-v0')
    env.target_reward = -200  # set target_reward manually for env 'Pendulum-v0'
    args.env = decorate_env(env=env)
    # args.env = decorate_env(env=gym.make('LunarLanderContinuous-v2'))
    # args.env = decorate_env(env=gym.make('BipedalWalker-v3'))  # recommend args.gamma = 0.95
    train_and_evaluate(args)
    exit()

    from elegantrl.agent import AgentGaePPO  # AgentPPO
    args.agent_rl = AgentGaePPO  # on-policy
    args = Arguments(if_on_policy=True)  # on-policy has different hyper-parameters from off-policy
    env = gym.make('Pendulum-v0')
    env.target_reward = -200  # set target_reward manually for env 'Pendulum-v0'
    args.env = decorate_env(env=env)
    # args.env = decorate_env(env=gym.make('LunarLanderContinuous-v2'))
    # args.env = decorate_env(env=gym.make('BipedalWalker-v3'))  # recommend args.gamma = 0.95
    train_and_evaluate(args)
    exit()

    '''DEMO 3: Custom Continuous action env: FinanceStock-v1'''
    args = Arguments(if_on_policy=True)
    from elegantrl.agent import AgentGaePPO  # AgentPPO
    args.agent_rl = AgentGaePPO  # PPO+GAE (on-policy)
    from elegantrl.env import FinanceMultiStockEnv
    args.env = FinanceMultiStockEnv()  # a standard env for ElegantRL, not need decorate_env()

    args.break_step = int(5e6 * 4)  # 5e6 (15e6) UsedTime 3,000s (9,000s)
    args.net_dim = 2 ** 8
    args.max_step = 1699
    args.max_memo = (args.max_step - 1) * 16
    args.batch_size = 2 ** 11
    args.repeat_times = 2 ** 4
    train_and_evaluate(args)
    exit()
Пример #4
0
def run__demo():
    import elegantrl.BetaWarning.agent as agent
    from elegantrl.BetaWarning.env import prep_env
    # from elegantrl.main import Arguments, train_and_evaluate, train_and_evaluate__multiprocessing
    import gym

    gym.logger.set_level(
        40
    )  # Block warning: 'WARN: Box bound precision lowered by casting to float32'
    """DEMO 1: Discrete action env: CartPole-v0 of gym"""
    args = Arguments(agent_rl=None, env=None,
                     gpu_id=None)  # see Arguments() to see hyper-parameters

    args.agent_rl = agent.AgentD3QN  # choose an DRL algorithm
    args.env = prep_env(env=gym.make('CartPole-v0'))
    args.net_dim = 2**7  # change a default hyper-parameters
    # args.env = decorate_env(env=gym.make('LunarLander-v2'))
    # args.net_dim = 2 ** 8  # change a default hyper-parameters

    train_and_evaluate(args)
    """DEMO 2: Continuous action env, gym.Box2D"""
    if_on_policy = False
    args = Arguments(
        if_on_policy=if_on_policy
    )  # on-policy has different hyper-parameters from off-policy
    if if_on_policy:
        args.agent_rl = agent.AgentGaePPO  # on-policy: AgentPPO, AgentGaePPO
    else:
        args.agent_rl = agent.AgentModSAC  # off-policy: AgentSAC, AgentModPPO, AgentTD3, AgentDDPG

    env = gym.make('Pendulum-v0')
    env.target_reward = -200  # set target_reward manually for env 'Pendulum-v0'
    args.env = prep_env(env=env)
    args.net_dim = 2**7  # change a default hyper-parameters
    # args.env = decorate_env(env=gym.make('LunarLanderContinuous-v2'))
    # args.env = decorate_env(env=gym.make('BipedalWalker-v3'))  # recommend args.gamma = 0.95

    train_and_evaluate(args)
    """DEMO 3: Custom Continuous action env: FinanceStock-v1"""
    args = Arguments(if_on_policy=True)
    args.agent_rl = agent.AgentGaePPO  # PPO+GAE (on-policy)

    from elegantrl.env import FinanceMultiStockEnv
    args.env = FinanceMultiStockEnv(
        if_train=True)  # a standard env for ElegantRL, not need decorate_env()
    args.env_eval = FinanceMultiStockEnv(if_train=False)
    args.break_step = int(5e6)  # 5e6 (15e6) UsedTime 3,000s (9,000s)
    args.net_dim = 2**8
    args.target_step = args.env.max_step
    args.max_memo = (args.max_step - 1) * 8
    args.batch_size = 2**11
    args.repeat_times = 2**4
    args.eval_times1 = 2**4

    # train_and_evaluate(args)
    args.rollout_num = 4
    train_and_evaluate__multiprocessing(args)
    args.env_eval.draw_cumulative_return(args, torch)
    '''DEMO 4: PyBullet(MuJoCo) Robot Env'''
    args = Arguments(if_on_policy=True)
    args.agent_rl = agent.AgentGaePPO  # agent.AgentPPO

    import pybullet_envs  # for python-bullet-gym
    dir(pybullet_envs)
    # args.env = decorate_env(gym.make('AntBulletEnv-v0'))
    args.env = prep_env(gym.make('ReacherBulletEnv-v0'))
    # args.repeat_times=8
    # args.max_memo=args.target_step =4096

    args.break_step = int(4e8)  # (4e4) 8e5, UsedTime: (300s) 700s
    args.if_break_early = False
    args.eval_times1 = 2**2
    args.eval_times1 = 2**4

    args.rollout_num = 4
    train_and_evaluate__multiprocessing(args)

    # """DEMO 5: Discrete action env: CartPole-v0 of gym"""
    import pybullet_envs  # for python-bullet-gym
    args = Arguments(agent_rl=None, env=None,
                     gpu_id=0)  # see Arguments() to see hyper-parameters
    args.agent_rl = agent.AgentTD3  # choose an DRL algorithm
    args.env = prep_env(env=gym.make('ReacherBulletEnv-v0'))
    args.net_dim = 2**7  # change a default hyper-parameters
    args.if_per = True
    args.break_step = int(2e20)  # (4e4) 8e5, UsedTime: (300s) 700s

    # train_and_evaluate(args)
    train_and_evaluate__multiprocessing(args)
    exit(0)
Пример #5
0
def run__demo():
    # from elegantrl.tutorial.run import Arguments, train_and_evaluate
    from elegantrl.tutorial.env import PreprocessEnv

    import gym
    gym.logger.set_level(
        40
    )  # Block warning: 'WARN: Box bound precision lowered by casting to float32'
    """DEMO 1: Discrete action env of gym"""
    args = Arguments(agent=None, env=None,
                     gpu_id=None)  # see Arguments() to see hyper-parameters
    '''choose an DRL algorithm'''
    from elegantrl.tutorial.agent import AgentDoubleDQN  # AgentDQN
    args.agent = AgentDoubleDQN()
    '''choose environment'''
    # args.env = PreprocessEnv(env=gym.make('CartPole-v0'))
    # args.net_dim = 2 ** 7  # change a default hyper-parameters
    # args.batch_size = 2 ** 7
    "TotalStep: 2e3, TargetReward: , UsedTime: 10s"
    args.env = PreprocessEnv(env=gym.make('LunarLander-v2'))
    args.net_dim = 2**8
    args.batch_size = 2**8
    "TotalStep: 6e4, TargetReward: 200, UsedTime: 600s"
    '''train and evaluate'''
    train_and_evaluate(args)
    exit()
    '''DEMO 2: Continuous action env if gym'''
    '''DEMO 2.1: choose an off-policy DRL algorithm'''
    from elegantrl.tutorial.agent import AgentSAC  # AgentTD3, AgentDDPG
    args = Arguments(if_on_policy=False)
    args.agent = AgentSAC()
    '''DEMO 2.2: choose an on-policy DRL algorithm'''
    from elegantrl.tutorial.agent import AgentPPO  # AgentGaePPO
    args = Arguments(
        if_on_policy=True
    )  # hyper-parameters of on-policy is different from off-policy
    args.agent = AgentPPO()
    '''choose environment'''
    env = gym.make('Pendulum-v0')
    env.target_reward = -200  # set target_reward manually for env 'Pendulum-v0'
    args.env = PreprocessEnv(env=env)
    args.reward_scale = 2**-3  # RewardRange: -1800 < -200 < -50 < 0
    args.net_dim = 2**7
    args.batch_size = 2**7
    "TotalStep: 4e5, TargetReward: -200, UsedTime: 400s"
    # args.env = PreprocessEnv(env=gym.make('LunarLanderContinuous-v2'))
    # args.reward_scale = 2 ** 0  # RewardRange: -800 < -200 < 200 < 302
    "TotalStep: 9e4, TargetReward: 200, UsedTime: 2500s"
    # args.env = PreprocessEnv(env=gym.make('BipedalWalker-v3'))
    # args.reward_scale = 2 ** 0  # RewardRange: -200 < -150 < 300 < 334
    # args.break_step = int(2e5)
    # args.if_allow_break = False
    "TotalStep: 2e5, TargetReward: 300, UsedTime: 5000s"
    '''train and evaluate'''
    train_and_evaluate(args)
    # train_and_evaluate__multiprocessing(args)  # try multiprocessing in complete version
    exit()
    '''DEMO 3: Custom Continuous action env: FinanceStock-v1'''
    args = Arguments(if_on_policy=True)
    '''choose an DRL algorithm'''
    from elegantrl.tutorial.agent import AgentPPO
    args.agent = AgentPPO()

    from elegantrl.env import FinanceMultiStockEnv  # a standard env for ElegantRL, not need PreprocessEnv()
    args.env = FinanceMultiStockEnv(if_train=True, train_beg=0, train_len=1024)
    args.env_eval = FinanceMultiStockEnv(
        if_train=False, train_beg=0,
        train_len=1024)  # eva_len = 1699 - train_len
    args.reward_scale = 2**0  # RewardRange: 0 < 1.0 < 1.25 <
    args.break_step = int(5e6)
    args.max_step = args.env.max_step
    args.max_memo = (args.max_step - 1) * 8
    args.batch_size = 2**11
    args.if_allow_break = False
    "TotalStep:  2e5, TargetReward: 1.25, UsedTime:  200s"
    "TotalStep:  4e5, TargetReward: 1.50, UsedTime:  400s"
    "TotalStep: 10e5, TargetReward: 1.62, UsedTime: 1000s"
    '''train and evaluate'''
    train_and_evaluate(args)
    # args.rollout_num = 8
    # train_and_evaluate__multiprocessing(args)  # try multiprocessing in complete version
    exit()