def main(args): U.reset() with open(os.path.join(args.logdir, 'hyps.json'), 'r') as f: hyps = json.load(f) train_args = namedtuple('Args', hyps.keys())(**hyps) env_fn = make_env_fn(train_args) model_fn = make_model_fn(train_args) env = env_fn(0) model = model_fn(env) model.build('model', 1, 1) model.sampler.build('model', 1, 1) sess = U.make_session() sess.__enter__() U.initialize() t = U.Experiment(args.logdir).load(args.ckpt) ls = [] rs = [] for i in range(args.samples): env.update_robot(model.sampler.sample(args.stochastic)[0]) l, r = eval_robot(args, env, model) ls.append(l) rs.append(r) if not args.stochastic: break os.makedirs(os.path.join(args.logdir, 'eval'), exist_ok=True) with open(os.path.join(args.logdir, 'eval', '{}.json'.format(t)), 'w') as f: json.dump({'l': ls, 'r': rs}, f) sess.__exit__(None, None, None)
def main(run_args): with open(os.path.join(run_args.logdir, 'hyps.json'), 'r') as f: hyps = json.load(f) args = namedtuple('Args', hyps.keys())(**hyps) env_fn = make_env_fn(args) model_fn = make_model_fn(args) time.sleep(1) alg = Algorithm(run_args.logdir, env_fn, model_fn, args.nenv, args.rollout_length, args.batchsize, epochs_per_iter=args.epochs, lr=args.lr, momentum=args.momentum, ent_coef=args.entcoeff, gamma=args.gamma, lambda_=args.lmbda, clip_norm=args.grad_clip_norm, clip_param=args.ppo_clip_param, robot_lr=args.robot_lr, robot_momentum=args.robot_momentum, fixed_robot=args.fixed_robot, steps_before_robot_update=args.steps_before_robot_update, steps_after_robot_update=args.steps_after_robot_update, chop_freq=args.chop_freq, tmax=args.maxtimesteps) alg.train(args.maxtimesteps, run_args.maxseconds, run_args.save_freq) alg.close()
def main(args): U.reset() with open(os.path.join(args.logdir, 'hyps.json'), 'r') as f: hyps = json.load(f) train_args = namedtuple('Args', hyps.keys())(**hyps) env_fn = make_env_fn(train_args) model_fn = make_model_fn(train_args) env = env_fn(0) env.unwrapped.set_render_ground(True) model = model_fn(env) model.build('model', 1, 1) model.sampler.build('model', 1, 1) sess = U.make_session() sess.__enter__() U.initialize() t = U.Experiment(args.logdir).load(args.ckpt) # load mode of design distribution env.update_robot(model.sampler.sample(stochastic=False)[0]) i = 0 if not args.save: env.reset() env.render() else: outdir = './video_tmp' os.makedirs(outdir, exist_ok=True) os.makedirs(os.path.join(args.logdir, 'video'), exist_ok=True) for j in range(args.nepisodes): done = False ob = env.reset() while not done: if args.save: rgb = env.render('rgb_array') ac = model.actor.mode(ob[None])[0] ob, rew, done, _ = env.step(ac) if args.save: imwrite(os.path.join(outdir, '{:05d}.png'.format(i)), rgb) i += 1 if args.save: outfile = str(t) + '.mp4' sp.call([ 'ffmpeg', '-r', '60', '-f', 'image2', '-i', os.path.join(outdir, '%05d.png'), '-vcodec', 'libx264', '-pix_fmt', 'yuv420p', os.path.join(outdir, 'out.mp4') ]) sp.call([ 'mv', os.path.join(outdir, 'out.mp4'), os.path.join(args.logdir, 'video', outfile) ]) sp.call(['rm', '-rf', outdir])