Python Manipulator2D示例

编程语言: Python

命名空间/包名称: manipulator_2d

类/类型: Manipulator2D

hotexamples.com的示例: 3

Python Manipulator2D - 已找到3个示例。这些是从开源项目中提取的最受好评的manipulator_2d.Manipulator2D现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

Manipulator2D(3)

常用方法

Manipulator2D (3)

示例#1

显示文件

def train_ppo():

    env = Manipulator2D()
    env = Monitor(env, log_dir)
    # Custom MLP policy of two layers of size 32 each with tanh activation function
    #policy_kwargs = dict(act_fun=tf.nn.tanh, net_arch=[32, 32])

    # Create the agent
    # env = SubprocVecEnv([make_env( i) for i in range(8)])
    # env = VecMonitor(env, log_dir)
    #model = PPO2(MlpPolicy, env, verbose=1, policy_kwargs=policy_kwargs,)
    model = PPO2(MlpPolicy, env, verbose=1, nminibatches=32, noptepochs = 10, ent_coef= 0.0)
    # Train the agent
    model.learn(total_timesteps=20000000, callback=callback)
    # Save the agent
    model.save("ppo2-mani14")

示例#2

显示文件

文件： ppo_finetune.py 项目： jeehyun100/control_rl

import tensorflow as tf
from manipulator_2d import Manipulator2D
from stable_baselines.common.policies import MlpPolicy
#from stable_baselines.common.policies import LnMlpPolicy
from stable_baselines import PPO2
import os
from callback import SaveOnBestTrainingRewardCallback
from stable_baselines.bench import Monitor

# Log dir
log_dir = "./tmp2/"
os.makedirs(log_dir, exist_ok=True)
callback = SaveOnBestTrainingRewardCallback(check_freq=1000, log_dir=log_dir)

env = Manipulator2D()
#env = Monitor(env, log_dir)
# Create the agent
# Gym Environment 호출

load_model_path = "tmp/ppo_15207000.zip"
#load_model_path = "ppo2-mani7.zip"
#저장된 학습 파일로부터 weight 등을 로드
model = PPO2.load(load_model_path)
env = model.get_env()
# change env
model.set_env(env)
model.learn(total_timesteps=16000000, callback=callback)
# Save the agent
model.save("ppo2-mani9")

# del model

示例#3

显示文件

 def _init():
     #env = gym.make(env_id)
     env = Manipulator2D()
     #env = Monitor(env, log_dir)
     env.seed(seed + rank)
     return env