def main(): arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args() extra_args = parse_cmdline_kwargs(unknown_args) args.num_timesteps = 0 args.play = True args.env = 'YamaXRealForwardWalk-v0' model, env = train(args, extra_args) env.close() env = build_env(args) obs = env.reset() def initialize_placeholders(nlstm=128, **kwargs): return np.zeros((args.num_env or 1, 2 * nlstm)), np.zeros((1)) state, dones = initialize_placeholders(**extra_args) while True: actions, _, state, _ = model.step(obs, S=state, M=dones) obs, _, done, _ = env.step(actions) env.render() done = done.any() if isinstance(done, np.ndarray) else done if done: obs = env.reset() env.close()
def load_model(path, render=False): model, env = train(args, extra_args) model.load(path) if render: #play obs = env.reset() score = 0 done = False while done == False: # Get the action actions, values, _ = model.step(obs) # Take actions in env and look the results obs, rewards, done, info = env.step(actions) score += rewards env.render() print("Score ", score) env.close() return model, env
def setUp(env, alg, load_path): args = Bunch({ 'env': env, 'alg': alg, 'num_timesteps': 0, 'seed': None, 'num_env': 1, 'network': None }) extra_args = {'load_path': load_path} model, env = train(args, extra_args) env.close() env = build_env(args, extra_args) return env, model
def nm_main(env, env_type, seed, alg, num_timesteps, network, gamestate, num_env, reward_scale, save_path, save_video_interval, save_video_length, play, log_path, env_args, alg_args): bl_args = bl_arg_class() bl_args.env = env bl_args.env_type = env_type bl_args.seed = seed bl_args.alg = alg bl_args.num_timesteps = num_timesteps bl_args.network = network bl_args.gamestate = gamestate bl_args.num_env = num_env bl_args.reward_scale = reward_scale bl_args.save_path = save_path bl_args.save_video_interval = save_video_interval bl_args.save_video_length = save_video_length bl_args.log_path = log_path bl_args.play = play bl_args.env_args = env_args if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: rank = 0 blr.configure_logger(bl_args.log_path) else: rank = MPI.COMM_WORLD.Get_rank() blr.configure_logger(bl_args.log_path, format_strs=[]) model, env = blr.train(bl_args, alg_args) #if save_path is not None and rank == 0: # save_path = os.path.expanduser(save_path) # model.save(save_path) env.close() print('\nDie allerallerallerletzte Zeile...\n')
def main(): arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args() args.num_env = 1 extra_args = parse_cmdline_kwargs(unknown_args) model, env = train(args, extra_args) env.close() logger.log("Running trained model") env = build_env(args) if not args.play: ts = time.gmtime() directory = time.strftime("./render/%s", ts) logger.log("Output video to directory:", directory) env.envs = [gym.wrappers.Monitor(env.envs[0], directory=directory)] obs = env.reset() def initialize_placeholders(nlstm=128, **kwargs): return np.zeros((args.num_env, 2 * nlstm)), np.zeros((1)) state, dones = initialize_placeholders(**extra_args) NUM_VIDEO = 1 while True: actions, _, state, _ = model.step(obs, S=state, M=dones) obs, _, done, _ = env.step(actions) if args.play: env.render() done = done.any() if isinstance(done, np.ndarray) else done if done: NUM_VIDEO -= 1 if NUM_VIDEO <= 0: break obs = env.reset() env.close()
import numpy as np import os import shutil from argparse import Namespace from baselines.run import build_env, train, parse_cmdline_kwargs from baselines.a2c.a2c import Model from baselines.common.cmd_util import common_arg_parser print("Running trained model") arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args() extra_args = parse_cmdline_kwargs(unknown_args) model, env = train(args, extra_args) # Load the data to test on data = np.load('test_images.npy') tests = len(data) # Build environment env = build_env(args) env.envs[0].env.env.phase = 'test' # Load model model.load(args.env + args.alg) # Test model on all images in dataset path = "/tmp/movements" norm_steps = [] fail = 0 # for j in range(100):