Exemplo n.º 1
0
def main():

    cmd_parser = cmd_parse()
    options = cmd_parser.parse_args()

    ## Get the Stock Ticker data ##
    # print("The Stock ticker used here is ", options.ticker)

    file = Path("./data/" + options.ticker + ".csv")
    if file.is_file():
        df = pd.read_csv('./data/' + options.ticker + '.csv')
        df = df.sort_values('Date')
        print("Loading ticker data from: " + "./data/" + options.ticker +
              ".csv")
    else:
        print(
            "Data file for ticker does not exist. Please download data first to ./data/"
            + options.ticker + ".csv")
    training_logs_path = options.output_file + "_training_logs.csv"
    eval_logs_path = options.output_file + "_eval_logs"

    ## Get the training set size ##
    print("The options.training_set_size is ", options.training_set_size)

    ## Get the number of look back days ##
    print("The options.look-back-days here is: ", options.look_back_days)

    ## Get the model we are using to train the agent ##
    print("The model to train the agent here is: ", options.model)

    # The algorithms require a vectorized environment to run
    env = DummyVecEnv([
        lambda: StockTradingEnv(df, options.look_back_days, options.
                                training_set_size, eval_logs_path)
    ])

    if options.model == "PPO2":
        model = PPO2(MlpPolicy, env, verbose=1)
        model.learn(total_timesteps=options.training_set_size)

    np.savetxt(training_logs_path, model.training_rewards, delimiter=",")
    obs = env.reset()
    for i in range(options.training_set_size, len(df['Date'])):
        action, _states = model.predict(obs)
        obs, rewards, done, info = env.step(action)
        env.render(title=options.ticker)
    env.close()
Exemplo n.º 2
0
    def __init__(self,
                 env_index: int = 0,
                 max_time_step_per_robot=1000,
                 model_path="./cnn",
                 feature_extractor="cnn"):
        self.env = make_vec_env(DoomEnv,
                                n_envs=1,
                                vec_env_cls=DummyVecEnv,
                                env_kwargs={
                                    "display": False,
                                    "feature": feature_extractor,
                                    "env_index": env_index
                                })

        policy_kwargs = dict(feature_extraction=feature_extractor)
        self.model = PPO2(PPOPolicy,
                          self.env,
                          policy_kwargs=policy_kwargs,
                          gamma=0.99,
                          n_steps=max_time_step_per_robot,
                          ent_coef=0.01,
                          learning_rate=2.5e-4,
                          vf_coef=0.5,
                          max_grad_norm=0.5,
                          lam=0.95,
                          nminibatches=4,
                          noptepochs=100,
                          cliprange=0.2,
                          cliprange_vf=None,
                          verbose=1,
                          tensorboard_log="./tensorboard",
                          _init_setup_model=True,
                          full_tensorboard_log=True,
                          seed=None,
                          n_cpu_tf_sess=None)

        self.model_path = model_path
        if path.exists(self.model_path):
            self.model.load(self.model_path, env=self.env)
from envs.WheeledRobotPybulletEnv import WheeledRobotPybulletEnv
from stable_baselines.ppo2.ppo2 import PPO2
from stable_baselines.common.vec_env import DummyVecEnv
import matplotlib.pyplot as plt

raw_env = WheeledRobotPybulletEnv(decision_interval=1,
                                  use_GUI=True,
                                  num_episode_steps=5)
# Optional: PPO2 requires a vectorized environment to run
# the env is now wrapped automatically when passing it to the constructor
vec_env = DummyVecEnv([lambda: raw_env])

dir_name = "results\LearningResults\PPO_WheeledRobotPybullet"
tensorboard_dir = dir_name + "\\tensorboard"
model_dir = dir_name + "\\model"
model = PPO2.load(model_dir, vec_env)
# model.learn(total_timesteps=100, tb_log_name="test")
# model.save(model_dir)

env = vec_env.envs[0]
obs_prev = env.reset()

x_poss = [env.snake_robot.x]
y_poss = [env.snake_robot.y]
thetas = [env.snake_robot.theta]
times = [0]
a1s = [env.snake_robot.a1]
a2s = [env.snake_robot.a2]
a1dots = [env.snake_robot.a1dot]
a2dots = [env.snake_robot.a2dot]
# robot_params = []
Exemplo n.º 4
0
    ['A'],
    ['B'],
    ['right'],
    ['right', 'A'],
    ['right', 'B'],
    ['right', 'A', 'B'],
    ['left'],
    ['left', 'A'],
    ['left', 'B'],
    ['left', 'A', 'B'],
    #    ['down'],
    #    ['up']
]

_env = gym_super_mario_bros.make('SuperMarioBros-v0')
#_env = gym_super_mario_bros.SuperMarioBrosEnv(frames_per_step=1, rom_mode='rectangle')
env = BinarySpaceToDiscreteSpaceEnv(_env, movements)
env = DummyVecEnv([lambda: env])
model = PPO2(policy=CnnPolicy, env=env, verbose=1)
model.learn(total_timesteps=10000)

obs = env.reset()

while True:
    action, _info = model.predict(obs)

    obs, rewards, dones, info = env.step(action)
    print("학습끝")
    print(rewards)
    env.render()
from stable_baselines.ppo2.ppo2 import PPO2
from stable_baselines.common.vec_env import DummyVecEnv
import matplotlib.pyplot as plt

raw_env = WheeledRobotPybulletEnv(decision_interval=.5, use_GUI=True)
# Optional: PPO2 requires a vectorized environment to run
# the env is now wrapped automatically when passing it to the constructor
vec_env = DummyVecEnv([lambda: raw_env])  # run multi agents

# Training
dir_name = "results\LearningResults\PPO_WheeledRobotPybullet"
tensorboard_dir = dir_name + "\\tensorboard"
model_dir = dir_name + "\\model"
model = PPO2(MlpPolicy,
             vec_env,
             verbose=1,
             tensorboard_log=tensorboard_dir,
             full_tensorboard_log=True)
model.learn(total_timesteps=20000, tb_log_name="test")  # specify
model.save(model_dir)

# Tensor Board --> cmd --> (DeepRobots) C:\Users\Jesse\Desktop\DeepRobots>tensorboard --logdir results\LearningResults\PPO_WheeledRobotPybullet

# Policy Rollout
env = vec_env.envs[0]
obs_prev = env.reset()

x_poss = [env.snake_robot.x]
y_poss = [env.snake_robot.y]
thetas = [env.snake_robot.theta]
times = [0]
Exemplo n.º 6
0
def run_ppo(config,
            state_collector,
            agent_name="ppo_99_8507750",
            policy="CnnPolicy",
            mode="train",
            task_mode="static",
            stack_offset=15,
            num_stacks=1,
            debug=True,
            normalize=True,
            disc_action_space=False,
            ns=""):

    path_to_models = config['PATHES']['path_to_models']

    # Loading agent
    model = PPO2.load("%s/%s/%s.pkl" %
                      (path_to_models, agent_name, agent_name))

    print("Loaded %s" % agent_name)
    print("--------------------------------------------------")
    print("Normalize: ", normalize)
    print("Policy: %s" % policy)
    print("Discrete action space: ", disc_action_space)
    print("Observation space size: ", model.observation_space.shape)
    print("Debug: ", debug)
    print("Number of stacks: %d, stack offset: %d" %
          (model.observation_space.shape[2], stack_offset))
    print("\n")

    #Loading environment
    env = load_train_env(ns, state_collector, 0.46, 19, num_stacks,
                         stack_offset, debug, task_mode, mode, policy,
                         disc_action_space, normalize)

    # Resetting environment
    if mode == "train" or mode == "eval":
        obs = env.reset()
    if mode == "exec" or mode == "exec_rw":
        if disc_action_space:
            obs, rewards, dones, info = env.step([5])
        else:
            obs, rewards, dones, info = env.step([[0.0, 0.0]])

    if debug:
        #Cummulative reward.
        cum_reward = 0
    while True:
        #Determining action for given observation
        action, _states = model.predict(obs)

        # Clipping actions
        if not disc_action_space:
            action = np.maximum(np.minimum(model.action_space.high, action),
                                model.action_space.low)

        #Executing action in environment
        obs, rewards, dones, info = env.step(action)

        if debug:
            cum_reward += rewards

            # Episode over?
            if dones:
                print("Episode finished with reward of %f." % cum_reward)
                cum_reward = 0

        time.sleep(0.0001)
        if rospy.is_shutdown():
            print('shutdown')
            break
Exemplo n.º 7
0
 def load(self, path=None):
     if path is None:
         path = self.model_path
     model = PPO2.load(path, env=self.env)
Exemplo n.º 8
0
        S = s_Dyymmdd_HHMM
    """
    import datetime
    date = str(datetime.datetime.now())
    date = date[2:4] + date[5:7] + date[8:10] + '_' + date[11:13] + date[14:16] + date[17:19]
    return s + '_D' + date


raw_env = IdealFluidSwimmerWithSpringEnv()
# Optional: PPO2 requires a vectorized environment to run
# the env is now wrapped automatically when passing it to the constructor
vec_env = DummyVecEnv([lambda: raw_env])

results_dir = '../results/PPO_IdealFluidSwimmerWithSpringFixedA1ddot/trial_1.22_D210122_163220'
model_path = os.path.join(results_dir, "model")
model = PPO2.load(model_path, vec_env)

env = vec_env.envs[0]
obs_prev = env.reset()

x_poss = [env.snake_robot.x]
y_poss = [env.snake_robot.y]
thetas = [env.snake_robot.theta]
times = [0]
a1s = [env.snake_robot.a1]
a2s = [env.snake_robot.a2]
a1dots = [env.snake_robot.a1dot]
a2dots = [env.snake_robot.a2dot]
a1ddots = [env.snake_robot.a1ddot]
ks = [env.snake_robot.k]
cs = [env.snake_robot.c]
Exemplo n.º 9
0
import gym

from envs.IdealFluidSwimmerWithSpringEnv import IdealFluidSwimmerWithSpringEnv
from stable_baselines.common.policies import MlpPolicy
from stable_baselines.ppo2.ppo2 import PPO2
import matplotlib.pyplot as plt

env = IdealFluidSwimmerWithSpringEnv()

tensorboard_dir = "results/LearningResults/PPO_IdealFluidSwimmerWithSpring"
model = PPO2(MlpPolicy, env, verbose=1, tensorboard_log=tensorboard_dir)
model.learn(total_timesteps=25, tb_log_name="trial_run")

obs_prev = env.reset()

x_poss = [env.swimmer.x]
y_poss = [env.swimmer.y]
thetas = [env.swimmer.theta]
times = [0]
a1s = [env.swimmer.a1]
a2s = [env.swimmer.a2]
a1dots = [env.swimmer.a1dot]
a2dots = [env.swimmer.a2dot]
a1ddots = [env.swimmer.a1ddot]
ks = [env.swimmer.k]
cs = [env.swimmer.c]
# robot_params = []
for i in range(100):
    x_prev = env.swimmer.x
    action, _states = model.predict(obs_prev)
    obs, rewards, dones, info = env.step(action)