def main(): args = PARSER.parse_args() data_path = get_path(args, "record") model_save_path = get_path(args, "tf_vae", create=True) ensure_validation_split(data_path) _n_train, _avg_frames, mean, var = analyse_dataset(data_path) if args.normalize_images: train_data, val_data = create_tf_dataset(data_path, args.z_size, True, mean, var) else: train_data, val_data = create_tf_dataset(data_path, args.z_size) shuffle_size = 5 * 1000 # Roughly 20 full episodes for shuffle windows, more increases RAM usage train_data = train_data.shuffle(shuffle_size, reshuffle_each_iteration=True).batch(args.vae_batch_size).prefetch(2) val_data = val_data.batch(args.vae_batch_size).prefetch(2) current_time = datetime.now().strftime("%Y%m%d-%H%M%S") tensorboard_dir = model_save_path / "tensorboard" / current_time vae = CVAE(args=args) vae.compile(optimizer=vae.optimizer, loss=vae.get_loss()) vae.fit(train_data, validation_data=val_data, epochs=args.vae_num_epoch, callbacks=[ tf.keras.callbacks.TensorBoard(log_dir=str(tensorboard_dir), update_freq=50, histogram_freq=1), LogImage(str(tensorboard_dir), val_data), tf.keras.callbacks.ModelCheckpoint(str(model_save_path / "ckpt-e{epoch:02d}"), verbose=1), ]) vae.save(str(model_save_path))
def main(): args = PARSER.parse_args() data_path = get_path(args, "record") model_save_path = get_path(args, "tf_gqn", create=True) ensure_validation_split(data_path) train_data = load_from_tfrecord(data_path, args.gqn_context_size, args.gqn_batch_size, mode='train') test_data = load_from_tfrecord(data_path, args.gqn_context_size, args.gqn_batch_size, mode='test') current_time = datetime.now().strftime("%Y%m%d-%H%M%S") tensorboard_dir = model_save_path / "tensorboard" / current_time # lr = tf.optimizers.schedules.ExponentialDecay(mu_i, mu_n, mu_f / mu_i, name="lr_schedule" ) lr = tf.optimizers.schedules.PolynomialDecay(mu_i, mu_n, mu_f, name="lr_schedule") sigma = tf.optimizers.schedules.PolynomialDecay(sigma_i, sigma_n, sigma_f, name="sigma_schedule") optimizer = tf.optimizers.Adam(learning_rate=lr) model = GenerativeQueryNetwork(args.gqn_x_dim, args.gqn_r_dim, args.gqn_h_dim, args.gqn_z_dim, args.gqn_l, name="gqn") model.compile(optimizer, sigma, const_sigma=sigma_f) model.fit(train_data, validation_data=test_data, validation_steps=5, steps_per_epoch=S_epoch, epochs=num_epochs, callbacks=[ tf.keras.callbacks.TensorBoard(log_dir=str(tensorboard_dir), update_freq=20, histogram_freq=1), tf.keras.callbacks.ModelCheckpoint( str(model_save_path / "ckpt-e{epoch:02d}"), save_freq=checkpoint_every, verbose=1), LogImages(tensorboard_dir, test_data), ])
def main(): args = PARSER.parse_args() data_dir = get_path(args, "series") train_data_path = data_dir / "series.npz" val_data_path = data_dir / "series_validation.npz" train_data = load_data(train_data_path) validation_data = load_data(val_data_path) initial_z_dir = get_path(args, "tf_initial_z", create=True) create_initial_z(initial_z_dir, train_data) train_dataset, validation_dataset = create_dataset( train_data, validation_data, args.rnn_batch_size, args.rnn_max_seq_len, args.z_size, args.rnn_input_seq_width, args.rnn_predict_done, args.rnn_predict_reward) train_rnn(args, train_dataset, validation_dataset)
def __init__(self, env, silent=False): super().__init__(env) from vae.vae import CVAE from utils import PARSER args = PARSER.parse_args(['--config_path', 'configs/carracing.config']) model_path_name = "models/tf_vae" self.vae = CVAE(args) # self.vae.set_weights(tf.keras.models.load_model( # model_path_name, compile=False).get_weights()) self.vae.set_weights(np.load("vae_weights.npy", allow_pickle=True)) self.observation_space = Box(low=float("-inf"), high=float("inf"), shape=(41, )) self.silent = silent
s, r, done, info = env.step(a) total_reward += r if steps % 200 == 0 or done: print("\naction " + str(["{:+0.2f}".format(x) for x in a])) print("step {} total_reward {:+0.2f}".format( steps, total_reward)) steps += 1 env.render() if done or restart: break env.monitor.close() if __name__ == '__main__': import configargparse from utils import PARSER args = PARSER.parse_args() #true_env = make_env(args, dream_env=False, with_obs=True) run_caracing_by_hunman() # from ppaquette_gym_doom.doom_take_cover import DoomTakeCoverEnv # from gym.utils import seeding # class DoomTakeCoverMDNRNN(DoomTakeCoverEnv): # def __init__(self, args, render_mode=False, load_model=True, with_obs=False): # super(DoomTakeCoverMDNRNN, self).__init__() # self.with_obs = with_obs # self.no_render = True # if render_mode: # self.no_render = False
import matplotlib.animation import matplotlib.pyplot as plt import wrappers import gym import car_racing_environment import numpy as np import os from PIL import Image import json import tensorflow as tf import random from vae.vae import CVAE # from env import make_env from utils import PARSER args = PARSER.parse_args(['--config_path', 'configs/carracing.config']) import pygame pygame.init() screen = pygame.display.set_mode((600, 300)) frame_skip = 3 seed = 2 env = wrappers.EvaluationWrapper(wrappers.VaeCarWrapper( gym.make("CarRacingSoftFS{}-v0".format(frame_skip))), seed, evaluate_for=15, report_each=1) DATA_DIR = "export" model_path_name = "models/tf_vae".format(args.exp_name, args.env_name) vae = CVAE(args)
def main(): print("Setting niceness to 19") if "nice" in os.__dict__: os.nice(19) args = PARSER.parse_args() def make_env_with_args(): return make_env(args=args, keep_image=True, wrap_rnn=False) dir_name = get_path(args, "record", create=True) controller = None if args.extract_use_controller: controller = make_controller(args=args) env = make_env_with_args() has_camera_data = isinstance( env.observation_space, gym.spaces.Dict) and "camera" in env.observation_space.spaces format_str = "[{success:s}] {done:s} after {frames:4d} frames, reward {reward:6.1f} " \ "(Total: {total_frames:7d} frames, {successful_trials:3d}/{total_trials:3d} successful trials)" total_frames = 0 successful_trials = 0 for trial in range(args.max_trials): try: seed = random.randint(0, 2**31 - 1) filename = dir_name / (str(seed) + ".npz") np.random.seed(seed) env.seed(seed) recording_image = [] recording_camera = [] recording_action = [] recording_reward = [] recording_done = [] # random policy if args.extract_use_controller: controller.init_random_model_params(stddev=np.random.rand() * 0.01) repeat_action = np.random.randint(1, 11) action = [0] * args.a_width total_reward = 0 obs = env.reset() frame = 0 ended_early = False for frame in range(args.max_frames): # Save current observation recording_image.append(obs["image"]) if has_camera_data: recording_camera.append(obs["camera"]) # Get next action (random) if not args.extract_repeat_actions or frame % repeat_action == 0: if args.extract_use_controller: action = controller.get_action(obs["features"]) else: action = np.random.rand(args.a_width) * 2.0 - 1.0 if args.extract_repeat_actions: repeat_action = np.random.randint(1, 11) # Save action recording_action.append(action) # Perform action obs, reward, done, _info = env.step(action) total_reward += reward # Save reward and done flag recording_reward.append(reward) recording_done.append(done) # Stop when done if done: ended_early = True break total_frames += (frame + 1) enough_frames = len(recording_image) >= args.min_frames # Save episode to disk (if it has required minimum length) if enough_frames: successful_trials += 1 recording_image = np.array(recording_image, dtype=np.uint8) recording_camera = np.array(recording_camera, dtype=np.float16) recording_action = np.array(recording_action, dtype=np.float16) recording_reward = np.array(recording_reward, dtype=np.float16) recording_done = np.array(recording_done, dtype=np.bool) data = { "image": recording_image, "action": recording_action, "reward": recording_reward, "done": recording_done } if has_camera_data: data["camera"] = recording_camera np.savez_compressed(str(filename), **data) print( format_str.format(success="O" if enough_frames else " ", done="Done" if ended_early else "Stop", frames=frame + 1, reward=total_reward, total_frames=total_frames, successful_trials=successful_trials, total_trials=trial + 1)) except gym.error.Error as e: print("Gym raised an error: " + str(e)) env.close() env = make_env_with_args() env.close()