Exemplo n.º 1
0
def train_ppo():

    env = Manipulator2D()
    env = Monitor(env, log_dir)
    # Custom MLP policy of two layers of size 32 each with tanh activation function
    #policy_kwargs = dict(act_fun=tf.nn.tanh, net_arch=[32, 32])

    # Create the agent
    # env = SubprocVecEnv([make_env( i) for i in range(8)])
    # env = VecMonitor(env, log_dir)
    #model = PPO2(MlpPolicy, env, verbose=1, policy_kwargs=policy_kwargs,)
    model = PPO2(MlpPolicy, env, verbose=1, nminibatches=32, noptepochs = 10, ent_coef= 0.0)
    # Train the agent
    model.learn(total_timesteps=20000000, callback=callback)
    # Save the agent
    model.save("ppo2-mani14")
Exemplo n.º 2
0
import tensorflow as tf
from manipulator_2d import Manipulator2D
from stable_baselines.common.policies import MlpPolicy
#from stable_baselines.common.policies import LnMlpPolicy
from stable_baselines import PPO2
import os
from callback import SaveOnBestTrainingRewardCallback
from stable_baselines.bench import Monitor

# Log dir
log_dir = "./tmp2/"
os.makedirs(log_dir, exist_ok=True)
callback = SaveOnBestTrainingRewardCallback(check_freq=1000, log_dir=log_dir)

env = Manipulator2D()
#env = Monitor(env, log_dir)
# Create the agent
# Gym Environment 호출

load_model_path = "tmp/ppo_15207000.zip"
#load_model_path = "ppo2-mani7.zip"
#저장된 학습 파일로부터 weight 등을 로드
model = PPO2.load(load_model_path)
env = model.get_env()
# change env
model.set_env(env)
model.learn(total_timesteps=16000000, callback=callback)
# Save the agent
model.save("ppo2-mani9")

# del model
Exemplo n.º 3
0
 def _init():
     #env = gym.make(env_id)
     env = Manipulator2D()
     #env = Monitor(env, log_dir)
     env.seed(seed + rank)
     return env