Python GaussianPol.dp_run示例

编程语言: Python

命名空间/包名称: machina.pols

类/类型: GaussianPol

方法/功能: dp_run

hotexamples.com的示例: 3

Python GaussianPol.dp_run - 已找到3个示例。这些是从开源项目中提取的最受好评的machina.pols.GaussianPol.dp_run现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

GaussianPol(28)

state_dict(4)

dp_run(3)

load_state_dict(3)

parameters(2)

deterministic_ac_real(1)

reset(1)

to(1)

示例#1

显示文件

high = np.inf * np.ones(4)
observation_space = Box(low=-high, high=high, dtype=np.float32)
high = np.ones(1)
action_space = Box(low=-high, high=high)

pol_net = PolNetLSTM(observation_space, action_space, h_size=args.h_size, cell_size=args.cell_size)
pol = GaussianPol(observation_space, action_space, pol_net, data_parallel=args.data_parallel, rnn=True)

if args.pol:
    pol.load_state_dict(torch.load(args.pol, map_location=lambda storage, loc: storage))
else:
    raise Exception

pol.to(device)

pol.dp_run = False

pol.reset()

r.set('start', 'false')
while True:
    if r.get('start').decode('utf-8') == 'true':
        break

class Process(object):
    def run(self):
        joint_pendulum = float(r.get('joint_info'))

        joint_pendulum_vel = 0
        self.action_input = 0
        first_ob = [np.cos(joint_pendulum), np.sin(joint_pendulum), joint_pendulum_vel,

示例#2

显示文件

文件： run_bc_ppo.py 项目： syundo0730/rl-robo-book-examples

while args.max_epis > total_epi:
    with measure('sample'):
        epis = sampler.sample(pol, max_steps=args.max_steps_per_iter)
    with measure('train'):
        traj = Traj()
        traj.add_epis(epis)

        traj = ef.compute_vs(traj, vf)
        traj = ef.compute_rets(traj, args.gamma)
        traj = ef.compute_advs(traj, args.gamma, args.lam)
        traj = ef.centerize_advs(traj)
        traj = ef.compute_h_masks(traj)
        traj.register_epis()

        if args.data_parallel:
            pol.dp_run = True
            vf.dp_run = True

        if args.ppo_type == 'clip':
            result_dict = ppo_clip.train(traj=traj,
                                         pol=pol,
                                         vf=vf,
                                         clip_param=args.clip_param,
                                         optim_pol=optim_pol,
                                         optim_vf=optim_vf,
                                         epoch=args.epoch_per_iter,
                                         batch_size=args.batch_size if
                                         not args.rnn else args.rnn_batch_size,
                                         max_grad_norm=args.max_grad_norm)
        else:
            result_dict = ppo_kl.train(traj=traj,

示例#3

显示文件

while args.max_epis > total_epi:
    with measure('sample'):
        epis = sampler.sample(pol, max_steps=args.max_steps_per_iter)
    with measure('train'):
        traj = Traj()
        traj.add_epis(epis)

        traj = ef.compute_vs(traj, vf)
        traj = ef.compute_rets(traj, args.gamma)
        traj = ef.compute_advs(traj, args.gamma, args.lam)
        traj = ef.centerize_advs(traj)
        traj = ef.compute_h_masks(traj)
        traj.register_epis()

        if args.data_parallel:
            pol.dp_run = True
            vf.dp_run = True

        result_dict = ppo_clip.train(traj=traj, pol=pol, vf=vf, clip_param=args.clip_param,
                                        optim_pol=optim_pol, optim_vf=optim_vf, epoch=args.epoch_per_iter, batch_size=args.batch_size if not args.rnn else args.rnn_batch_size, max_grad_norm=args.max_grad_norm)

    total_epi += traj.num_epi
    step = traj.num_step
    total_step += step
    rewards = [np.sum(epi['rews']) for epi in epis]
    mean_rew = np.mean(rewards)
    logger.record_results(args.log, result_dict, score_file,
                          total_epi, step, total_step,
                          rewards,
                          plot_title=args.env_name)