def validation(env): env = make_env(env) env = Monitor(env, './video',force=True) sonic = SonicAgent(env,TIMESTEPS_PER_EPISODE* EPISODES, True) sonic.load_model('sonic_model_final.h5') obs = env.reset() while True: action = sonic.policy(obs) #action = random.choice([a for a in range(env.action_space.n)]) next_obs, reward, done, info = env.step(action) print("Para la accion #{} la recompensa es {}".format(action, reward)) env.render() obs = next_obs if done: obs = env.close()
def training(env): total_reward = 0.0 reward_per_episode = [] env = make_env(env) sonic = SonicAgent(env,TIMESTEPS_PER_EPISODE * EPISODES) file = Path('sonic_model_final.h5') if file.is_file(): sonic.load_model('sonic_model_final.h5') obs = env.reset() for episodes in range(EPISODES): done = False print("Empieza Episodio #{}".format(episodes + 1)) while not done: action = sonic.policy(obs) next_obs, reward, done, info = env.step(action) sonic.save_memory(obs,action,reward,next_obs,done) if (len(sonic.memory) > sonic.batch_size): sonic.replay_and_learn() total_reward += reward obs = next_obs if done: break if episodes % 50 == 0: sonic.save_model('models/' + str(episodes) + '_sonic_model.h5') reward_per_episode.append(total_reward) sonic.update_target_model() print("Episodio #{} finalizado con recompensa {}".format(episodes + 1, total_reward)) obs = env.reset() total_reward = 0.0 #print(reward_per_episode) env.close() sonic.save_model('sonic_model_final.h5')
if __name__ == "__main__": import tensorflow as tf from a2c.A2C import A2C from utils.utils import make_env from env.subproc_vec_env import SubprocVecEnv envs = SubprocVecEnv([make_env(i) for i in range(4)]) sess = tf.Session() a2c = A2C(sess, envs, num_iterations=1000, save_step=1000, checkpoint_dir="./a2c/checkpoints/") a2c.train() env = SubprocVecEnv([make_env(0)]) a2c.test(env, 100000) sess.close()
n_timesteps = args.n_timesteps else: n_timesteps = int(hyperparams['n_timesteps']) del hyperparams['n_timesteps'] normalize = False normalize_kwargs = {} if 'normalize' in hyperparams.keys(): normalize = hyperparams['normalize'] if isinstance(normalize, str): normalize_kwargs = eval(normalize) normalize = True del hyperparams['normalize'] if not args.teleop: env = DummyVecEnv([make_env(args.seed, vae=vae, teleop=args.teleop)]) else: env = make_env(args.seed, vae=vae, teleop=args.teleop, n_stack=hyperparams.get('frame_stack', 1))() if normalize: if hyperparams.get('normalize', False) and args.algo in ['ddpg']: print("WARNING: normalization not supported yet for DDPG") else: print("Normalizing input and return") env = VecNormalize(env, **normalize_kwargs) # Optional Frame-stacking n_stack = 1
for key in SIM_PARAMS: saved_hyperparams[key] = eval(key) print(saved_hyperparams) ######################################################### ################# number of timesteps ################### # hyperparamsからn_timestepsを読み込む # parserで指定されたら上書き if args.n_timesteps > 0: n_timesteps = args.n_timesteps else: n_timesteps = int(hyperparams["n_timesteps"]) del hyperparams["n_timesteps"] ######################################################### env = DummyVecEnv([make_env(args.seed, vae=vae)]) ##################### normalize ######################### # If normalize exsists in hyperparams, add in normalize_kwargs # and set normalize to True normalize = False normalize_kwargs = {} if "normalize" in hyperparams.keys(): normalize = hyperparams["normalize"] if isinstance(normalize, str): normalize_kwargs = eval(normalize) normalize = True del hyperparams["normalize"] # Normalize the input image if normalize:
hyperparams[key] = linear_schedule(initial_value) elif isinstance(hyperparams[key], float): hyperparams[key] = constfn(hyperparams[key]) else: raise ValueError('Invalid valid for {}: {}'.format(key, hyperparams[key])) if args.n_timesteps > 0: n_timesteps = args.n_timesteps else: n_timesteps = int(hyperparams['n_timesteps']) del hyperparams['n_timesteps'] with make_carla_client('localhost', 2000) as client: print("CarlaClient connected") env = DummyVecEnv([make_env(client, args.seed, vae=vae)]) # Optional Frame-stacking n_stack = 1 if hyperparams.get('frame_stack', False): n_stack = hyperparams['frame_stack'] env = VecFrameStack(env, n_stack) print("Stacking {} frames".format(n_stack)) del hyperparams['frame_stack'] # Parse noise string for DDPG if args.algo == 'ddpg' and hyperparams.get('noise_type') is not None: noise_type = hyperparams['noise_type'].strip() noise_std = hyperparams['noise_std'] n_actions = env.action_space.shape[0] if 'adaptive-param' in noise_type:
del hyperparams['n_timesteps'] normalize = False normalize_kwargs = {} if 'normalize' in hyperparams.keys(): normalize = hyperparams['normalize'] if isinstance(normalize, str): normalize_kwargs = eval(normalize) normalize = True del hyperparams['normalize'] if not args.teleop and not args.local_control: env = DummyVecEnv([ make_env( args.seed, vae=vae, teleop=args.teleop, local_control=args.local_control, ) ]) else: env = make_env(args.seed, vae=vae, teleop=args.teleop, local_control=args.local_control, n_stack=hyperparams.get('frame_stack', 1))() if normalize: if hyperparams.get('normalize', False) and args.algo in ['ddpg']: print("WARNING: normalization not supported yet for DDPG") else: print("Normalizing input and return")
del hyperparams['n_timesteps'] normalize = False normalize_kwargs = {} if 'normalize' in hyperparams.keys(): normalize = hyperparams['normalize'] if isinstance(normalize, str): normalize_kwargs = eval(normalize) normalize = True del hyperparams['normalize'] if 'policy_kwargs' in hyperparams.keys(): hyperparams['policy_kwargs'] = eval(hyperparams['policy_kwargs']) if not args.teleop: env = DummyVecEnv([make_env(args.level, args.seed, vae=vae, teleop=args.teleop, obs_res=(IMAGE_WIDTH, IMAGE_HEIGHT))]) # env_fn = make_env(args.level, args.seed, vae=vae, teleop=args.teleop, obs_res=(IMAGE_WIDTH, IMAGE_HEIGHT)) # env = env_fn() else: env = make_env(args.level, args.seed, vae=vae, teleop=args.teleop, n_stack=hyperparams.get('frame_stack', 1), obs_res=(IMAGE_WIDTH, IMAGE_HEIGHT))() if normalize: if hyperparams.get('normalize', False) and args.algo in ['ddpg']: print("WARNING: normalization not supported yet for DDPG") else: print("Normalizing input and return") env = VecNormalize(env, **normalize_kwargs) # Optional Frame-stacking n_stack = 1