Пример #1
0
    def __init__(self, load_model=True):
        self.env_name = './VisualPushBlock_withBlock_z_info.x86_64'  #'./VisualPushBlock.x86_64'
        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)
        self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)

        if load_model:
            self.vae.load_json('vae/vae.json')
            self.rnn.load_json('rnn/rnn.json')

        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True

        self.input_size = rnn_output_size(EXP_MODE)
        self.z_size = z_size

        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer ###CHANGE is made here
            self.hidden_size = 40
            self.weight_hidden = np.random.randn(self.input_size,
                                                 self.hidden_size)
            self.bias_hidden = np.random.randn(self.hidden_size)
            self.weight_output = np.random.randn(self.hidden_size, ACTION_SIZE)
            self.bias_output = np.random.randn(ACTION_SIZE)
            self.param_count = ((self.input_size + 1) * self.hidden_size) + (
                self.hidden_size * ACTION_SIZE + ACTION_SIZE)
        else:
            self.weight = np.random.randn(self.input_size, ACTION_SIZE)
            self.bias = np.random.randn(ACTION_SIZE)
            self.param_count = (self.input_size) * ACTION_SIZE + ACTION_SIZE

        self.render_mode = False
Пример #2
0
    def __init__(self, args, render_mode=False, load_model=True):

        self.render_mode = render_mode
        model_path_name = 'results/{}/{}'.format(args.exp_name, args.env_name)
        with open(os.path.join(model_path_name, 'tf_initial_z/initial_z.json'),
                  'r') as f:
            [initial_mu, initial_logvar] = json.load(f)

        self.initial_mu_logvar = np.array(
            [list(elem) for elem in zip(initial_mu, initial_logvar)])

        self.vae = CVAE(args)
        self.rnn = MDNRNN(args)

        if load_model:
            self.vae.set_weights(
                tf.keras.models.load_model('results/{}/{}/tf_vae'.format(
                    args.exp_name, args.env_name),
                                           compile=False).get_weights())
            self.rnn.set_weights(
                tf.keras.models.load_model('results/{}/{}/tf_rnn'.format(
                    args.exp_name, args.env_name),
                                           compile=False).get_weights())

        # future versions of OpenAI gym needs a dtype=np.float32 in the next line:
        self.action_space = Box(low=-1.0, high=1.0, shape=())
        obs_size = self.rnn.args.z_size + self.rnn.args.rnn_size * self.rnn.args.state_space
        # future versions of OpenAI gym needs a dtype=np.float32 in the next line:
        self.observation_space = Box(low=-50., high=50., shape=(obs_size, ))

        self.rnn_states = None
        self.o = None

        self.seed()
        self.reset()
Пример #3
0
    def __init__(self, arglist):
        self.env_name = arglist.game
        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)
        self.vae.load_json(arglist.vae_file)
        self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)
        self.rnn.load_json(arglist.rnn_file)
        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True

        self.input_size = rnn_output_size(EXP_MODE)
        self.z_size = 32

        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            self.hidden_size = 40
            self.weight_hidden = np.random.randn(self.input_size,
                                                 self.hidden_size)
            self.bias_hidden = np.random.randn(self.hidden_size)
            self.weight_output = np.random.randn(self.hidden_size, 2)
            self.bias_output = np.random.randn(2)
            self.param_count = ((self.input_size + 1) *
                                self.hidden_size) + (self.hidden_size * 2 + 2)
        else:
            self.weight = np.random.randn(self.input_size, 2)
            self.bias = np.random.randn(2)
            self.param_count = (self.input_size) * 2 + 2

        self.render_mode = False
Пример #4
0
    def __init__(self,
                 args,
                 load_model=True,
                 full_episode=False,
                 with_obs=False):
        super(CarRacingMDNRNN, self).__init__(full_episode=full_episode)
        self.with_obs = with_obs  # whether or not to return the frame with the encodings
        self.vae = CVAE(args)
        self.rnn = MDNRNN(args)

        if load_model:
            self.vae.set_weights([
                param_i.numpy() for param_i in tf.saved_model.load(
                    'results/{}/tf_vae'.format(args.env_name)).variables
            ])
            self.rnn.set_weights([
                param_i.numpy() for param_i in tf.saved_model.load(
                    'results/{}/tf_rnn'.format(args.env_name)).variables
            ])
        self.rnn_states = rnn_init_state(self.rnn)

        self.full_episode = False
        self.observation_space = Box(low=np.NINF,
                                     high=np.Inf,
                                     shape=(args.z_size +
                                            args.rnn_size * args.state_space))
Пример #5
0
    def __init__(self,
                 args,
                 load_model=True,
                 full_episode=False,
                 with_obs=False):
        super(CarRacingMDNRNN, self).__init__(full_episode=full_episode)
        self.with_obs = with_obs  # whether or not to return the frame with the encodings
        self.vae = CVAE(args)
        self.rnn = MDNRNN(args)

        if load_model:
            self.vae.set_weights(
                tf.keras.models.load_model('results/{}/{}/tf_vae'.format(
                    args.exp_name, args.env_name),
                                           compile=False).get_weights())
            self.rnn.set_weights(
                tf.keras.models.load_model('results/{}/{}/tf_rnn'.format(
                    args.exp_name, args.env_name),
                                           compile=False).get_weights())

        self.rnn_states = rnn_init_state(self.rnn)

        self.full_episode = False
        self.observation_space = Box(low=np.NINF,
                                     high=np.Inf,
                                     shape=(32 + 256))
Пример #6
0
  def __init__(self, load_model=True):
    # For Mac
    # self.env_name = "/Users/intuinno/codegit/pushBlock/app/mac/VisualPushBlockContinuous"
    # For linux
    self.env_name = "/home/intuinno/codegit/pushblock/app/linux/pushblock.x86_64"
    self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True)

    self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)

    if load_model:
      self.vae.load_json('vae/vae.json')
      self.rnn.load_json('rnn/rnn.json')

    self.state = rnn_init_state(self.rnn)
    self.rnn_mode = True

    self.input_size = rnn_output_size(EXP_MODE)
    self.z_size = 32


    if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer
      self.hidden_size = 40
      self.weight_hidden = np.random.randn(self.input_size, self.hidden_size)
      self.bias_hidden = np.random.randn(self.hidden_size)
      self.weight_output = np.random.randn(self.hidden_size, 3)
      self.bias_output = np.random.randn(3)
      self.param_count = ((self.input_size+1)*self.hidden_size) + (self.hidden_size*3+3)
    else:
      self.weight = np.random.randn(self.input_size, 3)
      self.bias = np.random.randn(3)
      self.param_count = (self.input_size)*3+3

    self.render_mode = False
Пример #7
0
  def __init__(self):
    self.env_name = "carracing"
    self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True)
    self.vae.load_json('vae/vae.json')
    self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)
    self.rnn.load_json('rnn/rnn.json')
    self.state = rnn_init_state(self.rnn)
    self.rnn_mode = True

    self.input_size = rnn_output_size(EXP_MODE)
    self.z_size = 32

    if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer
      self.hidden_size = 40
      self.weight_hidden = np.random.randn(self.input_size, self.hidden_size)
      self.bias_hidden = np.random.randn(self.hidden_size)
      self.weight_output = np.random.randn(self.hidden_size, 3)
      self.bias_output = np.random.randn(3)
      self.param_count = ((self.input_size+1)*self.hidden_size) + (self.hidden_size*3+3)
    else:
      self.weight = np.random.randn(self.input_size, 3)
      self.bias = np.random.randn(3)
      self.param_count = (self.input_size)*3+3

    self.render_mode = False
Пример #8
0
    def __init__(self, arglist, action_space, scope, load_model=True):
        self.action_space = action_space
        self.arglist = arglist
        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)

        hps_sample = hps_model._replace(
            batch_size=1,
            input_seq_width=32 + arglist.action_space +
            (arglist.agent_num - 1) * arglist.action_space * arglist.timestep,
            max_seq_len=1,
            use_recurrent_dropout=0,
            is_training=0)

        self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)

        if load_model:
            self.vae.load_json(arglist.vae_model_dir)
            self.rnn.load_json(arglist.rnn_model_dir)

        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True
        if arglist.inference:
            self.input_size = rnn_output_size(
                EXP_MODE) + (arglist.agent_num - 1) * arglist.action_space
        else:
            self.input_size = rnn_output_size(EXP_MODE)
        self.z_size = 32

        # action trajectories recording
        self.act_traj = [
            collections.deque(np.zeros(
                (arglist.timestep, arglist.action_space)),
                              maxlen=arglist.timestep)
        ] * (arglist.agent_num - 1)
        self.oppo_model = Oppo_Model(arglist.agent_num, arglist.timestep,
                                     arglist.action_space,
                                     arglist.action_space,
                                     "oppo_model_{}".format(scope))
        self.inference = arglist.inference

        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            self.hidden_size = 40
            self.weight_hidden = np.random.randn(self.input_size,
                                                 self.hidden_size)
            self.bias_hidden = np.random.randn(self.hidden_size)
            self.weight_output = np.random.randn(self.hidden_size,
                                                 self.action_space)
            self.bias_output = np.random.randn(self.action_space)
            self.param_count = ((self.input_size + 1) * self.hidden_size) + (
                self.hidden_size * self.action_space + self.action_space)
        else:
            self.weight = np.random.randn(self.input_size, self.action_space)
            self.bias = np.random.randn(self.action_space)
            self.param_count = (
                self.input_size) * self.action_space + self.action_space
Пример #9
0
class ModelMCTS(Model):
    def __init__(self, load_model=True):
        self.env_name = "carracing"
        self.env = make_env(self.env_name,
                            seed=SEED,
                            render_mode=render_mode,
                            full_episode=False)
        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)

        self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)

        if load_model:
            self.vae.load_json('../vae/vae.json')
            self.rnn.load_json('../rnn/rnn.json')

        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True

        self.input_size = rnn_output_size(EXP_MODE)
        self.z_size = 32

        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            self.hidden_size = 40
            self.weight_hidden = np.random.randn(self.input_size,
                                                 self.hidden_size)
            self.bias_hidden = np.random.randn(self.hidden_size)
            self.weight_output = np.random.randn(self.hidden_size, 3)
            self.bias_output = np.random.randn(3)
            self.param_count = ((self.input_size + 1) *
                                self.hidden_size) + (self.hidden_size * 3 + 3)
        else:
            self.weight = np.random.randn(self.input_size, 3)
            self.bias = np.random.randn(3)
            self.param_count = (self.input_size) * 3 + 3

        self.render_mode = False
        self.mct = None

    def get_action(self, z):
        a = random_linear_sample(-1, 1)
        b = random_linear_sample(0, 1)
        c = random_linear_sample(0, 1)
        actions = dp(a, b, c)
        action, self.mct = mcts.mcts(z,
                                     self.env,
                                     actions,
                                     old_tree=self.mct,
                                     tree_depth=6,
                                     simulate_depth=200)

        self.state = rnn_next_state(self.rnn, z, action, self.state)

        return action
Пример #10
0
def train_rnn(args, train_dataset, validation_dataset):
    model_save_path = get_path(args, "tf_rnn", create=True)

    rnn = MDNRNN(args=args)
    rnn.compile(optimizer=rnn.optimizer,
                loss=rnn.loss_fn,
                metrics=rnn.get_metrics())

    print("Start training")

    current_time = datetime.now().strftime("%Y%m%d-%H%M%S")
    tensorboard_dir = model_save_path / "tensorboard" / current_time

    rnn.fit(train_dataset,
            validation_data=validation_dataset,
            steps_per_epoch=args.rnn_epoch_steps,
            epochs=args.rnn_num_steps // args.rnn_epoch_steps,
            callbacks=[
                tf.keras.callbacks.TensorBoard(log_dir=str(tensorboard_dir),
                                               update_freq=20,
                                               histogram_freq=1,
                                               profile_batch=0),
                tf.keras.callbacks.ModelCheckpoint(str(model_save_path /
                                                       "ckpt-e{epoch:03d}"),
                                                   verbose=1),
            ])

    rnn.save(str(model_save_path))
    print(f"Model saved to {model_save_path}")
Пример #11
0
  def __init__(self, load_model=True, full_episode=False):
    super(CarRacingMDNRNN, self).__init__(full_episode=full_episode)
    self.vae = CVAE(batch_size=1)
    self.rnn = MDNRNN(hps_sample)
     
    if load_model:
      self.vae.load_json('tf_vae/vae.json')
      self.rnn.load_json('tf_rnn/rnn.json')

    self.rnn_states = rnn_init_state(self.rnn)
    
    self.full_episode = False 
    self.observation_space = Box(low=np.NINF, high=np.Inf, shape=(32+256))
Пример #12
0
class CarRacingMDNRNN(CarRacingWrapper):
  def __init__(self, args, load_model=True, full_episode=False, with_obs=False):
    super(CarRacingMDNRNN, self).__init__(full_episode=full_episode)
    self.with_obs = with_obs # whether or not to return the frame with the encodings
    self.vae = CVAE(args)
    self.rnn = MDNRNN(args)
     
    if load_model:
      self.vae.set_weights([param_i.numpy() for param_i in tf.saved_model.load('results/{}/{}/tf_vae'.format(args.exp_name, args.env_name)).variables])
      self.rnn.set_weights([param_i.numpy() for param_i in tf.saved_model.load('results/{}/{}/tf_rnn'.format(args.exp_name, args.env_name)).variables])
    self.rnn_states = rnn_init_state(self.rnn)
    
    self.full_episode = False 
    self.observation_space = Box(low=np.NINF, high=np.Inf, shape=(args.z_size+args.rnn_size*args.state_space))
  def encode_obs(self, obs):
    # convert raw obs to z, mu, logvar
    result = np.copy(obs).astype(np.float)/255.0
    result = result.reshape(1, 64, 64, 3)
    z = self.vae.encode(result)[0]
    return z
  def reset(self):
    self.rnn_states = rnn_init_state(self.rnn)
    if self.with_obs:
        [z_state, obs] = super(CarRacingMDNRNN, self).reset() # calls step
        self.N_tiles = len(self.track)
        return [z_state, obs]
    else:
        z_state = super(CarRacingMDNRNN, self).reset() # calls step
        self.N_tiles = len(self.track)
        return z_state
  def _step(self, action):
    obs, reward, done, _ = super(CarRacingMDNRNN, self)._step(action)
    z = tf.squeeze(self.encode_obs(obs))
    h = tf.squeeze(self.rnn_states[0])
    c = tf.squeeze(self.rnn_states[1])
    if self.rnn.args.state_space == 2:
        z_state = tf.concat([z, c, h], axis=-1)
    else:
        z_state = tf.concat([z, h], axis=-1)
    if action is not None: # don't compute state on reset
        self.rnn_states = rnn_next_state(self.rnn, z, action, self.rnn_states)
    if self.with_obs:
        return [z_state, obs], reward, done, {}
    else:
        return z_state, reward, done, {}
  def close(self):
    super(CarRacingMDNRNN, self).close()
    tf.keras.backend.clear_session()
    gc.collect()
Пример #13
0
    def __init__(self,
                 args,
                 render_mode=False,
                 load_model=True,
                 with_obs=False):
        super(DoomTakeCoverMDNRNN, self).__init__()

        self.with_obs = with_obs

        self.no_render = True
        if render_mode:
            self.no_render = False
        self.current_obs = None

        self.vae = CVAE(args)
        self.rnn = MDNRNN(args)

        if load_model:
            self.vae.set_weights([
                param_i.numpy() for param_i in
                tf.saved_model.load('results/{}/{}/tf_vae'.format(
                    args.exp_name, args.env_name)).variables
            ])
            self.rnn.set_weights([
                param_i.numpy() for param_i in
                tf.saved_model.load('results/{}/{}/tf_rnn'.format(
                    args.exp_name, args.env_name)).variables
            ])

        self.action_space = Box(low=-1.0, high=1.0, shape=())
        self.obs_size = self.rnn.args.z_size + self.rnn.args.rnn_size * self.rnn.args.state_space

        self.observation_space = Box(low=0, high=255, shape=(64, 64, 3))
        self.actual_observation_space = Box(low=-50.,
                                            high=50.,
                                            shape=(self.obs_size))

        self._seed()

        self.rnn_states = None
        self.z = None
        self.restart = None
        self.frame_count = None
        self.viewer = None
        self._reset()
Пример #14
0
    def __init__(self,
                 model_name='',
                 load_model=True,
                 load_full_model=False,
                 full_model_path=''):
        self.model_name = model_name
        self.env_name = "carracing"
        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)

        self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)

        if load_full_model:
            self.vae.load_json(os.path.join(full_model_path, 'vae.json'))
            self.rnn.load_json(os.path.join(full_model_path, 'rnn.json'))
        elif load_model:
            self.vae.load_json(
                os.path.join(vae_path, self.model_name + '_vae.json'))
            self.rnn.load_json(
                os.path.join(rnn_path, self.model_name + '_rnn.json'))

        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True

        self.input_size = rnn_output_size(EXP_MODE)
        self.z_size = 32

        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            self.hidden_size = 40
            self.weight_hidden = np.random.randn(self.input_size,
                                                 self.hidden_size)
            self.bias_hidden = np.random.randn(self.hidden_size)
            self.weight_output = np.random.randn(self.hidden_size, 3)
            self.bias_output = np.random.randn(3)
            self.param_count = ((self.input_size + 1) *
                                self.hidden_size) + (self.hidden_size * 3 + 3)
        else:
            self.weight = np.random.randn(self.input_size, 3)
            self.bias = np.random.randn(3)
            self.param_count = (self.input_size) * 3 + 3

        self.render_mode = False
Пример #15
0
 def __init__(self, type="CarRacing", history_pick=4, seed=None, detect_edges=False, detect_grass=False, flip=False):
     self.name = type + str(time.time())
     random.seed(30)
     self.env = make_env('CarRacing-v0', random.randint(1,10000000), render_mode = False, full_episode = True)
     self.image_dimension = [64,64]
     self.history_pick = history_pick
     self.state_space_size = history_pick * np.prod(self.image_dimension)
     self.action_space_size = 5
     self.state_shape = [None, self.history_pick] + list(self.image_dimension)
     self.history = []
     self.action_dict = {0: [-1, 0, 0], 1: [1, 0, 0], 2: [0, 1, 0], 3: [0, 0, 0.8], 4: [0, 0, 0]}
     self.seed = seed
     self.detect_edges = detect_edges
     self.detect_grass = detect_grass
     self.flip = flip
     self.flip_episode = False
     self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True)
     self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)
     self.vae.load_json('vae/vae.json')
     self.rnn.load_json('rnn/rnn.json')
    def __init__(self,
                 sess=None,
                 summary_writer=tf.summary.FileWriter("logs/"),
                 rl_training=False,
                 reuse=False,
                 cluster=None,
                 index=0,
                 device='/gpu:0',
                 ppo_load_path=None,
                 ppo_save_path=None,
                 load_worldmodel=True,
                 ntype='worldmodel'):
        self.policy_model_path_load = ppo_load_path + ntype
        self.policy_model_path_save = ppo_save_path + ntype

        self.rl_training = rl_training

        self.use_norm = True

        self.reuse = reuse
        self.sess = sess
        self.cluster = cluster
        self.index = index
        self.device = device

        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)
        self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)

        if load_worldmodel:
            self.vae.load_json('vae/vae.json')
            self.rnn.load_json('rnn/rnn.json')

        self.input_size = rnn_output_size(EXP_MODE)

        self._create_graph()

        self.rl_saver = tf.train.Saver()
        self.summary_writer = summary_writer
Пример #17
0
class CarRacingMDNRNN(CarRacingWrapper):
  def __init__(self, load_model=True, full_episode=False):
    super(CarRacingMDNRNN, self).__init__(full_episode=full_episode)
    self.vae = CVAE(batch_size=1)
    self.rnn = MDNRNN(hps_sample)
     
    if load_model:
      self.vae.load_json('tf_vae/vae.json')
      self.rnn.load_json('tf_rnn/rnn.json')

    self.rnn_states = rnn_init_state(self.rnn)
    
    self.full_episode = False 
    self.observation_space = Box(low=np.NINF, high=np.Inf, shape=(32+256))

  def encode_obs(self, obs):
    # convert raw obs to z, mu, logvar
    result = np.copy(obs).astype(np.float)/255.0
    result = result.reshape(1, 64, 64, 3)
    mu, logvar = self.vae.encode_mu_logvar(result)
    mu = mu[0]
    logvar = logvar[0]
    s = logvar.shape
    z = mu + np.exp(logvar/2.0) * np.random.randn(*s)
    return z, mu, logvar

  def reset(self):
    self.rnn_states = rnn_init_state(self.rnn)
    z_h = super(CarRacingWrapper, self).reset() # calls step
    return z_h

  def _step(self, action):
    obs, reward, done, _ = super(CarRacingMDNRNN, self)._step(action)
    z, _, _ = self.encode_obs(obs)
    h = tf.squeeze(self.rnn_states[0])
    z_h = tf.concat([z, h], axis=-1)

    if action is not None: # don't compute state on reset
        self.rnn_states = rnn_next_state(self.rnn, z, action, self.rnn_states)
    return z_h, reward, done, {}
Пример #18
0
    def __init__(self, load_model=True):
        self.env_name = "Pong"
        self._make_env()

        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)

        hps_sample_dynamic = hps_sample._replace(num_actions=self.num_actions)
        self.rnn = MDNRNN(hps_sample_dynamic, gpu_mode=False, reuse=True)

        if load_model:
            self.vae.load_json('vae/vae.json')
            self.rnn.load_json('rnn/rnn.json')

        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True

        self.input_size = rnn_output_size(EXP_MODE)
        self.z_size = 32

        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            raise Exception("not ported for atari")
            self.hidden_size = 40
            self.weight_hidden = np.random.randn(self.input_size,
                                                 self.hidden_size)
            self.bias_hidden = np.random.randn(self.hidden_size)
            self.weight_output = np.random.randn(self.hidden_size,
                                                 self.num_actions)
            self.bias_output = np.random.randn(self.num_actions)
            self.param_count = ((self.input_size + 1) * self.hidden_size) + (
                (self.hidden_size + 1) * self.num_actions)
        else:
            # TODO: Not known until env.action_space is queried...
            self.weight = np.random.randn(self.input_size, self.num_actions)
            self.bias = np.random.randn(self.num_actions)
            self.param_count = (self.input_size + 1) * self.num_actions

        self.render_mode = False
Пример #19
0
    def __init__(self, load_model=True, env_name="Pong-v0", render_mode=False):
        self.env_name = env_name
        self.make_env()
        self.z_size = 32

        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)
        hps_atari = hps_sample._replace(input_seq_width=self.z_size + self.na)
        self.rnn = MDNRNN(hps_atari, gpu_mode=False, reuse=True)

        if load_model:
            self.vae.load_json('vae/vae.json')
            self.rnn.load_json('rnn/rnn.json')

        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True

        self.input_size = rnn_output_size(EXP_MODE)
        self.init_controller()

        self.render_mode = False
Пример #20
0
class Model:
  ''' simple one layer model for car racing '''
  def __init__(self):
    self.env_name = "carracing"
    self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True)
    self.vae.load_json('vae/vae.json')
    self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)
    self.rnn.load_json('rnn/rnn.json')
    self.state = rnn_init_state(self.rnn)
    self.rnn_mode = True

    self.input_size = rnn_output_size(EXP_MODE)
    self.z_size = 32

    if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer
      self.hidden_size = 40
      self.weight_hidden = np.random.randn(self.input_size, self.hidden_size)
      self.bias_hidden = np.random.randn(self.hidden_size)
      self.weight_output = np.random.randn(self.hidden_size, 3)
      self.bias_output = np.random.randn(3)
      self.param_count = ((self.input_size+1)*self.hidden_size) + (self.hidden_size*3+3)
    else:
      self.weight = np.random.randn(self.input_size, 3)
      self.bias = np.random.randn(3)
      self.param_count = (self.input_size)*3+3

    self.render_mode = False

  def make_env(self, seed=-1, render_mode=False):
    self.render_mode = render_mode
    self.env = make_env(self.env_name, seed=seed, render_mode=render_mode)

  def reset(self):
    self.state = rnn_init_state(self.rnn)

  def encode_obs(self, obs):
    # convert raw obs to z, mu, logvar
    result = np.copy(obs).astype(np.float)/255.0
    result = result.reshape(1, 64, 64, 3)
    mu, logvar = self.vae.encode_mu_logvar(result)
    mu = mu[0]
    logvar = logvar[0]
    s = logvar.shape
    z = mu + np.exp(logvar/2.0) * np.random.randn(*s)
    return z, mu, logvar

  def decode_obs(self, z):
    # decode the latent vector
    img = self.vae.decode(z.reshape(1, self.z_size)) * 255.
    img = np.round(img).astype(np.uint8)
    img = img.reshape(64, 64, 3)
    return img

  def get_action(self, z):
    h = rnn_output(self.state, z, EXP_MODE)

    '''
    action = np.dot(h, self.weight) + self.bias
    action[0] = np.tanh(action[0])
    action[1] = sigmoid(action[1])
    action[2] = clip(np.tanh(action[2]))
    '''
    if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer
      h = np.tanh(np.dot(h, self.weight_hidden) + self.bias_hidden)
      action = np.tanh(np.dot(h, self.weight_output) + self.bias_output)
    else:
      action = np.tanh(np.dot(h, self.weight) + self.bias)
    
    action[1] = (action[1]+1.0) / 2.0
    action[2] = clip(action[2])

    self.state = rnn_next_state(self.rnn, z, action, self.state)

    return action

  def set_model_params(self, model_params):
    if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer
      params = np.array(model_params)
      cut_off = (self.input_size+1)*self.hidden_size
      params_1 = params[:cut_off]
      params_2 = params[cut_off:]
      self.bias_hidden = params_1[:self.hidden_size]
      self.weight_hidden = params_1[self.hidden_size:].reshape(self.input_size, self.hidden_size)
      self.bias_output = params_2[:3]
      self.weight_output = params_2[3:].reshape(self.hidden_size, 3)
    else:
      self.bias = np.array(model_params[:3])
      self.weight = np.array(model_params[3:]).reshape(self.input_size, 3)

  def load_model(self, filename):
    with open(filename) as f:    
      data = json.load(f)
    print('loading file %s' % (filename))
    self.data = data
    model_params = np.array(data[0]) # assuming other stuff is in data
    self.set_model_params(model_params)

  def get_random_model_params(self, stdev=0.1):
    return np.random.randn(self.param_count)*stdev
N_data = len(data_mu)  # should be 10k
batch_size = hps_model.batch_size

# save 1000 initial mu and logvars:
initial_mu = np.copy(data_mu[:1000, 0, :] * 10000).astype(np.int).tolist()
initial_logvar = np.copy(data_logvar[:1000, 0, :] * 10000).astype(
    np.int).tolist()
with open(os.path.join("tf_initial_z", "initial_z.json"), 'wt') as outfile:
    json.dump([initial_mu, initial_logvar],
              outfile,
              sort_keys=True,
              indent=0,
              separators=(',', ': '))

reset_graph()
rnn = MDNRNN(hps_model)

# train loop:
hps = hps_model
start = time.time()
for local_step in range(hps.num_steps):

    step = rnn.sess.run(rnn.global_step)
    curr_learning_rate = (hps.learning_rate - hps.min_learning_rate) * (
        hps.decay_rate)**step + hps.min_learning_rate

    raw_z, raw_a = random_batch()
    inputs = np.concatenate((raw_z[:, :-1, :], raw_a[:, :-1, :]), axis=2)
    outputs = raw_z[:, 1:, :]  # teacher forcing (shift by one predictions)

    feed = {
Пример #22
0
from baselines.ddpg.models import Actor, Critic
from baselines.ddpg.memory import Memory
from baselines.ddpg.noise import AdaptiveParamNoiseSpec, NormalActionNoise, OrnsteinUhlenbeckActionNoise
from baselines.common import set_global_seeds
import baselines.common.tf_util as U

from baselines import logger
import numpy as np

try:
    from mpi4py import MPI
except ImportError:
    MPI = None

vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True)
rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)
vae.load_json('vae/vae.json')
rnn.load_json('rnn/rnn.json')


def learn(network, env,
          seed=None,
          total_timesteps=None,
          nb_epochs=None, # with default settings, perform 1M steps total
          nb_epoch_cycles=20,
          nb_rollout_steps=100,
          reward_scale=1.0,
          render=False,
          render_eval=False,
          noise_type='adaptive-param_0.2',
          normalize_returns=False,
Пример #23
0
class Model:
    ''' simple one layer model for translating game state to actions'''
    def __init__(self, load_model=True):
        self.env_name = "Pong"
        self._make_env()

        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)

        hps_sample_dynamic = hps_sample._replace(num_actions=self.num_actions)
        self.rnn = MDNRNN(hps_sample_dynamic, gpu_mode=False, reuse=True)

        if load_model:
            self.vae.load_json('vae/vae.json')
            self.rnn.load_json('rnn/rnn.json')

        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True

        self.input_size = rnn_output_size(EXP_MODE)
        self.z_size = 32

        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            raise Exception("not ported for atari")
            self.hidden_size = 40
            self.weight_hidden = np.random.randn(self.input_size,
                                                 self.hidden_size)
            self.bias_hidden = np.random.randn(self.hidden_size)
            self.weight_output = np.random.randn(self.hidden_size,
                                                 self.num_actions)
            self.bias_output = np.random.randn(self.num_actions)
            self.param_count = ((self.input_size + 1) * self.hidden_size) + (
                (self.hidden_size + 1) * self.num_actions)
        else:
            # TODO: Not known until env.action_space is queried...
            self.weight = np.random.randn(self.input_size, self.num_actions)
            self.bias = np.random.randn(self.num_actions)
            self.param_count = (self.input_size + 1) * self.num_actions

        self.render_mode = False

    def _make_env(self):
        self.render_mode = render_mode
        self.env = make_env(self.env_name)
        self.num_actions = self.env.action_space.n

    def make_env(self):
        pass  #TODO (Chazzz): eventually remove

    def reset(self):
        self.state = rnn_init_state(self.rnn)

    def encode_obs(self, obs):
        # convert raw obs to z, mu, logvar
        result = np.copy(obs).astype(np.float) / 255.0
        result = result.reshape(1, 64, 64, 1)
        mu, logvar = self.vae.encode_mu_logvar(result)
        mu = mu[0]
        logvar = logvar[0]
        s = logvar.shape
        z = mu + np.exp(logvar / 2.0) * np.random.randn(*s)
        return z, mu, logvar

    def get_action(self, z):
        h = rnn_output(self.state, z, EXP_MODE)
        # print(len(h), " h:", h) #TODO: 256+32 (the 32 comes first)
        # So we could have 288*2*18 params, or 288*2*environment.action_space.n (6 for Pong)
        '''
    action = np.dot(h, self.weight) + self.bias
    action[0] = np.tanh(action[0])
    action[1] = sigmoid(action[1])
    action[2] = clip(np.tanh(action[2]))
    '''
        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            raise Exception("Not ported to atari")
            # h = np.tanh(np.dot(h, self.weight_hidden) + self.bias_hidden)
            # action = np.tanh(np.dot(h, self.weight_output) + self.bias_output)
        else:
            # could probabilistically sample from softmax, but greedy
            action = np.argmax(np.matmul(h, self.weight) + self.bias)

        # action[1] = (action[1]+1.0) / 2.0
        # action[2] = clip(action[2])
        # print("Action:", action)
        action_one_hot = np.zeros(self.num_actions)
        action_one_hot[action] = 1
        # print("Action hot:", action_one_hot)

        self.state = rnn_next_state(self.rnn, z, action_one_hot, self.state)

        return action

    def set_model_params(self, model_params):
        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            params = np.array(model_params)
            cut_off = (self.input_size + 1) * self.hidden_size
            params_1 = params[:cut_off]
            params_2 = params[cut_off:]
            self.bias_hidden = params_1[:self.hidden_size]
            self.weight_hidden = params_1[self.hidden_size:].reshape(
                self.input_size, self.hidden_size)
            self.bias_output = params_2[:self.num_actions]
            self.weight_output = params_2[self.num_actions:].reshape(
                self.hidden_size, self.num_actions)
        else:
            self.bias = np.array(model_params[:self.num_actions])
            self.weight = np.array(model_params[self.num_actions:]).reshape(
                self.input_size, self.num_actions)

    def load_model(self, filename):
        with open(filename) as f:
            data = json.load(f)
        print('loading file %s' % (filename))
        self.data = data
        model_params = np.array(data[0])  # assuming other stuff is in data
        self.set_model_params(model_params)

    def get_random_model_params(self, stdev=0.1):
        #return np.random.randn(self.param_count)*stdev
        return np.random.standard_cauchy(
            self.param_count) * stdev  # spice things up

    def init_random_model_params(self, stdev=0.1):
        params = self.get_random_model_params(stdev=stdev)
        self.set_model_params(params)
        vae_params = self.vae.get_random_model_params(stdev=stdev)
        self.vae.set_model_params(vae_params)
        rnn_params = self.rnn.get_random_model_params(stdev=stdev)
        self.rnn.set_model_params(rnn_params)
Пример #24
0
class DreamDoomTakeCoverMDNRNN:
    def __init__(self, args, render_mode=False, load_model=True):

        self.render_mode = render_mode
        model_path_name = 'results/{}/{}'.format(args.exp_name, args.env_name)
        with open(os.path.join(model_path_name, 'tf_initial_z/initial_z.json'),
                  'r') as f:
            [initial_mu, initial_logvar] = json.load(f)

        self.initial_mu_logvar = np.array(
            [list(elem) for elem in zip(initial_mu, initial_logvar)])

        self.vae = CVAE(args)
        self.rnn = MDNRNN(args)

        if load_model:
            self.vae.set_weights([
                param_i.numpy() for param_i in
                tf.saved_model.load('results/{}/{}/tf_vae'.format(
                    args.exp_name, args.env_name)).variables
            ])
            self.rnn.set_weights([
                param_i.numpy() for param_i in
                tf.saved_model.load('results/{}/{}/tf_rnn'.format(
                    args.exp_name, args.env_name)).variables
            ])

        # future versions of OpenAI gym needs a dtype=np.float32 in the next line:
        self.action_space = Box(low=-1.0, high=1.0, shape=())
        obs_size = self.rnn.args.z_size + self.rnn.args.rnn_size * self.rnn.args.state_space
        # future versions of OpenAI gym needs a dtype=np.float32 in the next line:
        self.observation_space = Box(low=-50., high=50., shape=(obs_size, ))

        self.rnn_states = None
        self.o = None

        self._training = True

        self.seed()
        self.reset()

    def _sample_init_z(self):
        idx = self.np_random.randint(low=0,
                                     high=self.initial_mu_logvar.shape[0])
        init_mu, init_logvar = self.initial_mu_logvar[idx]
        init_mu = init_mu / 10000.0
        init_logvar = init_logvar / 10000.0
        init_z = init_mu + np.exp(
            init_logvar / 2.0) * self.np_random.randn(*init_logvar.shape)
        return init_z

    def reset(self):
        self.rnn_states = rnn_init_state(self.rnn)
        z = np.expand_dims(self._sample_init_z(), axis=0)
        self.o = z
        z_ch = tf.concat([z, self.rnn_states[1], self.rnn_states[0]], axis=-1)
        return tf.squeeze(z_ch)

    def seed(self, seed=None):
        if seed:
            tf.random.set_seed(seed)

        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def step(self, action):
        rnn_states_p1, z_tp1, r_tp1, d_tp1 = rnn_sim(self.rnn,
                                                     self.o,
                                                     self.rnn_states,
                                                     action,
                                                     training=self._training)
        self.rnn_states = rnn_states_p1
        self.o = z_tp1

        z_ch = tf.squeeze(
            tf.concat([z_tp1, self.rnn_states[1], self.rnn_states[0]],
                      axis=-1))
        return z_ch.numpy(), tf.squeeze(r_tp1), d_tp1.numpy(), {}

    def close(self):
        tf.keras.backend.clear_session()
        gc.collect()

    def render(self, mode):
        pass
Пример #25
0
class Model:
    ''' simple one layer model for car racing '''
    def __init__(self, arglist):
        self.env_name = arglist.game
        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)
        self.vae.load_json(arglist.vae_file)
        self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)
        self.rnn.load_json(arglist.rnn_file)
        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True

        self.input_size = rnn_output_size(EXP_MODE)
        self.z_size = 32

        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            self.hidden_size = 40
            self.weight_hidden = np.random.randn(self.input_size,
                                                 self.hidden_size)
            self.bias_hidden = np.random.randn(self.hidden_size)
            self.weight_output = np.random.randn(self.hidden_size, 2)
            self.bias_output = np.random.randn(2)
            self.param_count = ((self.input_size + 1) *
                                self.hidden_size) + (self.hidden_size * 2 + 2)
        else:
            self.weight = np.random.randn(self.input_size, 2)
            self.bias = np.random.randn(2)
            self.param_count = (self.input_size) * 2 + 2

        self.render_mode = False

    def make_env(self, seed=-1, render_mode=False):
        self.render_mode = render_mode
        self.env = make_env(self.env_name, seed=seed, render_mode=render_mode)

    def reset(self):
        self.state = rnn_init_state(self.rnn)

    def encode_obs(self, obs):
        # convert raw obs to z, mu, logvar
        result = np.copy(obs).astype(np.float) / 255.0
        result = result.reshape(1, 64, 64, 3)
        mu, logvar = self.vae.encode_mu_logvar(result)
        mu = mu[0]
        logvar = logvar[0]
        s = logvar.shape
        z = mu + np.exp(logvar / 2.0) * np.random.randn(*s)
        return z, mu, logvar

    def decode_obs(self, z):
        # decode the latent vector
        img = self.vae.decode(z.reshape(1, self.z_size)) * 255.
        img = np.round(img).astype(np.uint8)
        img = img.reshape(64, 64, 3)
        return img

    def get_action(self, z, arglist):
        h = rnn_output(self.state, z, EXP_MODE)
        '''
    action = np.dot(h, self.weight) + self.bias
    action[0] = np.tanh(action[0])
    action[1] = sigmoid(action[1])
    action[2] = clip(np.tanh(action[2]))
    '''
        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            h = np.tanh(np.dot(h, self.weight_hidden) + self.bias_hidden)
            action = np.tanh(np.dot(h, self.weight_output) + self.bias_output)
        else:
            action = np.tanh(np.dot(h, self.weight) + self.bias)

        if arglist.competitive:
            obs, rewards, done, win = self.env.step([action[0], 'script'])
        else:
            obs, rewards, done, win = self.env.step(action)

        extra_reward = 0.0  # penalize for turning too frequently
        if arglist.competitive:
            if arglist.train_mode and penalize_turning:
                extra_reward -= np.abs(action[0]) / 10.0
                rewards[0] += extra_reward
            reward = rewards[0]
        else:
            if arglist.train_mode and penalize_turning:
                reward = np.sum(rewards)
                extra_reward -= np.abs(action[0]) / 10.0
                reward += extra_reward

        # recording_reward.append(reward)
        # total_reward += reward

        self.state = rnn_next_state(self.rnn, z, action, self.state)

        return action

    def set_model_params(self, model_params):
        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            params = np.array(model_params)
            cut_off = (self.input_size + 1) * self.hidden_size
            params_1 = params[:cut_off]
            params_2 = params[cut_off:]
            self.bias_hidden = params_1[:self.hidden_size]
            self.weight_hidden = params_1[self.hidden_size:].reshape(
                self.input_size, self.hidden_size)
            self.bias_output = params_2[:2]
            self.weight_output = params_2[2:].reshape(self.hidden_size, 2)
        else:
            self.bias = np.array(model_params[:2])
            self.weight = np.array(model_params[2:]).reshape(
                self.input_size, 2)

    def load_model(self, filename):
        with open(filename) as f:
            data = json.load(f)
        print('loading file %s' % (filename))
        self.data = data
        model_params = np.array(data[0])  # assuming other stuff is in data
        self.set_model_params(model_params)

    def get_random_model_params(self, stdev=0.1):
        return np.random.randn(self.param_count) * stdev
Пример #26
0
class DoomTakeCoverMDNRNN(DoomTakeCoverEnv):
    def __init__(self,
                 args,
                 render_mode=False,
                 load_model=True,
                 with_obs=False):
        super(DoomTakeCoverMDNRNN, self).__init__()

        self.with_obs = with_obs

        self.no_render = True
        if render_mode:
            self.no_render = False
        self.current_obs = None

        self.vae = CVAE(args)
        self.rnn = MDNRNN(args)

        if load_model:
            self.vae.set_weights([
                param_i.numpy() for param_i in
                tf.saved_model.load('results/{}/{}/tf_vae'.format(
                    args.exp_name, args.env_name)).variables
            ])
            self.rnn.set_weights([
                param_i.numpy() for param_i in
                tf.saved_model.load('results/{}/{}/tf_rnn'.format(
                    args.exp_name, args.env_name)).variables
            ])

        self.action_space = Box(low=-1.0, high=1.0, shape=())
        self.obs_size = self.rnn.args.z_size + self.rnn.args.rnn_size * self.rnn.args.state_space

        self.observation_space = Box(low=0, high=255, shape=(64, 64, 3))
        self.actual_observation_space = Box(low=-50.,
                                            high=50.,
                                            shape=(self.obs_size))

        self._seed()

        self.rnn_states = None
        self.z = None
        self.restart = None
        self.frame_count = None
        self.viewer = None
        self._reset()

    def close(self):
        super(DoomTakeCoverMDNRNN, self).close()
        tf.keras.backend.clear_session()
        gc.collect()

    def _step(self, action):

        # update states of rnn
        self.frame_count += 1

        self.rnn_states = rnn_next_state(self.rnn, self.z, action,
                                         self.rnn_states)

        # actual action in wrapped env:

        threshold = 0.3333
        full_action = [0] * 43

        if action < -threshold:
            full_action[11] = 1

        if action > threshold:
            full_action[10] = 1

        obs, reward, done, _ = super(DoomTakeCoverMDNRNN,
                                     self)._step(full_action)
        small_obs = self._process_frame(obs)
        self.current_obs = small_obs
        self.z = self._encode(small_obs)

        if done:
            self.restart = 1
        else:
            self.restart = 0

        if self.with_obs:
            return [self._current_state(), self.current_obs], reward, done, {}
        else:
            return self._current_state(), reward, done, {}

    def _encode(self, img):
        simple_obs = np.copy(img).astype(np.float) / 255.0
        simple_obs = simple_obs.reshape(1, 64, 64, 3)
        z = self.vae.encode(simple_obs)[0]
        return z

    def _reset(self):
        obs = super(DoomTakeCoverMDNRNN, self)._reset()
        small_obs = self._process_frame(obs)
        self.current_obs = small_obs
        self.rnn_states = rnn_init_state(self.rnn)
        self.z = self._encode(small_obs)
        self.restart = 1
        self.frame_count = 0

        if self.with_obs:
            return [self._current_state(), self.current_obs]
        else:
            return self._current_state()

    def _process_frame(self, frame):
        obs = frame[0:400, :, :]
        obs = Image.fromarray(obs, mode='RGB').resize((64, 64))
        obs = np.array(obs)
        return obs

    def _current_state(self):
        if self.rnn.args.state_space == 2:
            return np.concatenate([
                self.z,
                tf.keras.backend.flatten(self.rnn_states[1]),
                tf.keras.backend.flatten(self.rnn_states[0])
            ],
                                  axis=0)  # cell then hidden fro some reason
        return np.concatenate(
            [self.z, tf.keras.backend.flatten(self.rnn_states[0])],
            axis=0)  # only the hidden state

    def _seed(self, seed=None):
        if seed:
            tf.random.set_seed(seed)
        self.np_random, seed = seeding.np_random(seed)
        return [seed]
Пример #27
0
class Model:
  def __init__(self, arglist,action_space, scope, load_model=True):
    self.action_space = action_space
    self.arglist = arglist
    # self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True)

    hps_sample = hps_model._replace(batch_size=1,
                input_seq_width = arglist.obs_size+ arglist.action_space + (arglist.agent_num-1) * arglist.action_space * arglist.timestep,   
                max_seq_len=1, use_recurrent_dropout=0, is_training=0,
                obs_size = arglist.obs_size )

    self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)

    if load_model:
      # self.vae.load_json(arglist.vae_model_dir)
      self.rnn.load_json(arglist.rnn_model_dir)

    self.state = self.rnn.rnn_init_state()
    self.rnn_mode = True
    print(arglist.inference)
    if arglist.inference == True: 
      self.input_size = self.rnn.rnn_output_size(arglist.exp_mode) +(arglist.agent_num-1)  * arglist.action_space
    elif arglist.inference == False:
      self.input_size = self.rnn.rnn_output_size(arglist.exp_mode) + (arglist.timestep) *( arglist.agent_num-1) * arglist.action_space  
    # self.z_size = 32
    # action trajectories recording 
    self.act_traj = [collections.deque(np.zeros((arglist.timestep, arglist.action_space)), maxlen = arglist.timestep)] *(arglist.agent_num -1)
    # self.oppo_model = Oppo_Model(arglist.agent_num, arglist.timestep, arglist.action_space,arglist.action_space, "oppo_model_{}".format(scope) )
    self.inference = arglist.inference

    if arglist.exp_mode == MODE_Z_HIDDEN: # one hidden layer
      self.hidden_size = 40
      self.weight_hidden = np.random.randn(self.input_size, self.hidden_size)
      self.bias_hidden = np.random.randn(self.hidden_size)
      self.weight_output = np.random.randn(self.hidden_size, self.action_space)
      self.bias_output = np.random.randn(self.action_space)
      self.param_count = ((self.input_size+1)*self.hidden_size) + (self.hidden_size*self.action_space+self.action_space)
    else:
      self.weight = np.random.randn(self.input_size, self.action_space)
      self.bias = np.random.randn(self.action_space)
      self.param_count = (self.input_size)*self.action_space+self.action_space

  def reset(self):
    self.state = self.rnn.rnn_init_state()
    # self.oppo_state = lstm_init_state(self.oppo_model)

  # def encode_obs(self, obs):
  #   # convert raw obs to z, mu, logvar
  #   result = np.copy(obs).astype(np.float)/255.0
  #   result = result.reshape(1, 64, 64, 3)
  #   mu, logvar = self.vae.encode_mu_logvar(result)
  #   mu = mu[0]
  #   logvar = logvar[0]
  #   s = logvar.shape
  #   z = mu + np.exp(logvar/2.0) * np.random.randn(*s)
  #   return z, mu, logvar

  def get_action(self, obs, act_traj):
    h = self.rnn.rnn_output(self.state, obs, act_traj, self.arglist.exp_mode)

    if self.arglist.inference:
      oppo_intents = []
      for i in range(self.arglist.agent_num - 1):
        act_traj = self.act_traj[i]
        # intent = self.oppo_model .get_inference(act_traj)
        intent = [0,0]
        oppo_intents.append(intent)
      oppo_intents = np.reshape(oppo_intents, ((self.arglist.agent_num-1 )* self.arglist.action_space))
      #Oppo intent shape (batch_size, agent_num, action_space)
      # reshape oppo_intent  agent_num * batch_size * action_space
     
      controller_input = np.concatenate((h, oppo_intents))
     
    else:
      controller_input =  h
    
    if self.arglist.exp_mode == MODE_Z_HIDDEN: # one hidden layer
      x = np.tanh(np.dot(controller_input, self.weight_hidden) + self.bias_hidden)
      action = np.tanh(np.dot(x, self.weight_output) + self.bias_output)
    else:
      action = np.tanh(np.dot(controller_input, self.weight) + self.bias)
    for i in range(self.action_space):
      action[i] = clip(action[i])
    
    self.state = self.rnn.rnn_next_state(obs, action, self.act_traj, self.state)
    # self.oppo_state = oppo_next_state(self.oppo_model, action, self.act_traj, self.oppo_state)

    # epsilon exploration
    if np.random.uniform(0,1) < 0.2:
      action = [np.random.uniform(-3,3)] * len(action)
    return action

  def set_model_params(self, model_params):
    if self.arglist.exp_mode == MODE_Z_HIDDEN: # one hidden layer
      params = np.array(model_params)
      cut_off = (self.input_size+1)*self.hidden_size
      params_1 = params[:cut_off]
      params_2 = params[cut_off:]
      self.bias_hidden = params_1[:self.hidden_size]
      self.weight_hidden = params_1[self.hidden_size:].reshape(self.input_size, self.hidden_size)
      self.bias_output = params_2[:self.action_space]
      self.weight_output = params_2[self.action_space:].reshape(self.hidden_size, self.action_space)
    else:
      self.bias = np.array(model_params[:self.action_space])
      self.weight = np.array(model_params[self.action_space:]).reshape(self.input_size, self.action_space)

  def load_model(self, filename):
    with open(filename) as f:    
      data = json.load(f)
    print('loading file %s' % (filename))
    # self.data = data
    model_params = np.array(data[0]) # assuming other stuff is in data
    self.set_model_params(model_params)

  def get_random_model_params(self, stdev=0.1):
    #return np.random.randn(self.param_count)*stdev
    return np.random.standard_cauchy(self.param_count)*stdev # spice things up

  def init_random_model_params(self, stdev=0.1):
    params = self.get_random_model_params(stdev=stdev)
    self.set_model_params(params)
    # vae_params = self.vae.get_random_model_params(stdev=stdev)
    # self.vae.set_model_params(vae_params)
    rnn_params = self.rnn.get_random_model_params(stdev=stdev)
    self.rnn.set_model_params(rnn_params)
Пример #28
0
class Model:
    ''' simple one layer model for car racing '''
    def __init__(self, load_model=True):
        self.env_name = './VisualPushBlock_withBlock_z_info.x86_64'  #'./VisualPushBlock.x86_64'
        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)
        self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)

        if load_model:
            self.vae.load_json('vae/vae.json')
            self.rnn.load_json('rnn/rnn.json')

        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True

        self.input_size = rnn_output_size(EXP_MODE)
        self.z_size = z_size

        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer ###CHANGE is made here
            self.hidden_size = 40
            self.weight_hidden = np.random.randn(self.input_size,
                                                 self.hidden_size)
            self.bias_hidden = np.random.randn(self.hidden_size)
            self.weight_output = np.random.randn(self.hidden_size, ACTION_SIZE)
            self.bias_output = np.random.randn(ACTION_SIZE)
            self.param_count = ((self.input_size + 1) * self.hidden_size) + (
                self.hidden_size * ACTION_SIZE + ACTION_SIZE)
        else:
            self.weight = np.random.randn(self.input_size, ACTION_SIZE)
            self.bias = np.random.randn(ACTION_SIZE)
            self.param_count = (self.input_size) * ACTION_SIZE + ACTION_SIZE

        self.render_mode = False

    def make_env(self,
                 seed=-1,
                 render_mode=False,
                 full_episode=False,
                 worker_id=0):
        self.render_mode = render_mode
        self.env = make_env(self.env_name,
                            seed=seed,
                            render_mode=render_mode,
                            full_episode=full_episode,
                            worker_id=worker_id)

    def reset(self):
        self.state = rnn_init_state(self.rnn)

    def encode_obs(self, obs):
        # convert raw obs to z, mu, logvar
        #result = np.copy(obs).astype(np.float)/255.0

        result = np.copy(obs).astype(np.float)
        result = result.reshape(1, IMAGE_W, IMAGE_H, 3)
        mu, logvar = self.vae.encode_mu_logvar(result)
        mu = mu[0]
        logvar = logvar[0]
        s = logvar.shape
        z = mu + np.exp(logvar / 2.0) * np.random.randn(*s)
        return z, mu, logvar

    def get_action(self, z):
        h = rnn_output(self.state, z, EXP_MODE)
        #print('h', h.shape, h)
        '''
    action = np.dot(h, self.weight) + self.bias
    action[0] = np.tanh(action[0])
    action[1] = sigmoid(action[1])
    action[2] = clip(np.tanh(action[2]))
    '''
        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            h = np.tanh(np.dot(h, self.weight_hidden) + self.bias_hidden)
            action = np.tanh(np.dot(h, self.weight_output) + self.bias_output)
        else:
            '''print(h.shape)
      print(self.weight.shape)
      print(self.bias.shape)'''
            action = np.tanh(np.dot(h, self.weight) + self.bias)
        '''for i in range(ACTION_SIZE):
      action[i] = (action[i]+1.0) / 2.0 #all actions value are in range 0 to 1'''
        #action[2] = clip(action[2])
        self.state = rnn_next_state(self.rnn, z, action,
                                    self.state)  #update weights of MDN-RNN
        return action

    def set_model_params(self, model_params):
        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            params = np.array(model_params)
            cut_off = (self.input_size + 1) * self.hidden_size
            params_1 = params[:cut_off]
            params_2 = params[cut_off:]
            self.bias_hidden = params_1[:self.hidden_size]
            self.weight_hidden = params_1[self.hidden_size:].reshape(
                self.input_size, self.hidden_size)
            self.bias_output = params_2[:ACTION_SIZE]
            self.weight_output = params_2[ACTION_SIZE:].reshape(
                self.hidden_size, ACTION_SIZE)
        else:
            self.bias = np.array(model_params[:ACTION_SIZE])
            self.weight = np.array(model_params[ACTION_SIZE:]).reshape(
                self.input_size, ACTION_SIZE)

    def load_model(self, filename):
        with open(filename) as f:
            data = json.load(f)
        print('loading file %s' % (filename))
        self.data = data
        model_params = np.array(data[0])  # assuming other stuff is in data
        self.set_model_params(model_params)

    def get_random_model_params(self, stdev=0.1):
        #return np.random.randn(self.param_count)*stdev
        return np.random.standard_cauchy(
            self.param_count) * stdev  # spice things up

    def init_random_model_params(self, stdev=0.1):
        params = self.get_random_model_params(stdev=stdev)
        self.set_model_params(params)
        vae_params = self.vae.get_random_model_params(stdev=stdev)
        self.vae.set_model_params(vae_params)
        rnn_params = self.rnn.get_random_model_params(stdev=stdev)
        self.rnn.set_model_params(rnn_params)
Пример #29
0
class Model:
    ''' simple one layer model for car racing '''
    def __init__(self, load_model=True, env_name="Pong-v0", render_mode=False):
        self.env_name = env_name
        self.make_env()
        self.z_size = 32

        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)
        hps_atari = hps_sample._replace(input_seq_width=self.z_size + self.na)
        self.rnn = MDNRNN(hps_atari, gpu_mode=False, reuse=True)

        if load_model:
            self.vae.load_json('vae/vae.json')
            self.rnn.load_json('rnn/rnn.json')

        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True

        self.input_size = rnn_output_size(EXP_MODE)
        self.init_controller()

        self.render_mode = False

    # INIT The Controller After the enviroment Creation.
    def make_env(self, seed=-1, render_mode=False):
        self.render_mode = render_mode
        self.env = make_env(self.env_name, seed=seed, render_mode=render_mode)
        self.na = self.env.action_space.n  # discrete by default.

    def init_controller(self):
        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            self.hidden_size = 40
            self.weight_hidden = np.random.randn(self.input_size,
                                                 self.hidden_size)
            self.bias_hidden = np.random.randn(self.hidden_size)
            self.weight_output = np.random.randn(
                self.hidden_size, self.na)  # pong. Modify later.
            self.bias_output = np.random.randn(self.na)
            self.param_count = (self.input_size + 1) * self.hidden_size + (
                self.hidden_size + 1) * self.na
        else:
            self.weight = np.random.randn(self.input_size, self.na)
            self.bias = np.random.randn(self.na)
            self.param_count = (self.input_size + 1) * self.na

    def reset(self):
        self.state = rnn_init_state(self.rnn)

    def encode_obs(self, obs):
        # convert raw obs to z, mu, logvar
        result = np.copy(obs).astype(np.float) / 255.0
        result = result.reshape(1, 64, 64, 1)
        mu, logvar = self.vae.encode_mu_logvar(result)
        mu = mu[0]
        logvar = logvar[0]
        s = logvar.shape
        z = mu + np.exp(logvar / 2.0) * np.random.randn(*s)
        return z, mu, logvar

    def get_action(self, z, epsilon=0.0):
        h = rnn_output(self.state, z, EXP_MODE)
        '''
    action = np.dot(h, self.weight) + self.bias
    action[0] = np.tanh(action[0])
    action[1] = sigmoid(action[1])
    action[2] = clip(np.tanh(action[2]))
    '''
        if np.random.rand() < epsilon:
            action = np.random.randint(0, self.na)
        else:
            if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
                h = np.maximum(
                    np.dot(h, self.weight_hidden) + self.bias_hidden, 0)
                action = np.argmax(
                    np.dot(h, self.weight_output) + self.bias_output)
            else:
                action = np.argmax(np.dot(h, self.weight) + self.bias)

        oh_action = np.zeros(self.na)
        oh_action[action] = 1

        # action[1] = (action[1]+1.0) / 2.0
        # action[2] = clip(action[2])

        # TODO check about this fucntion
        self.state = rnn_next_state(self.rnn, z, oh_action, self.state)

        return action

    def set_model_params(self, model_params):
        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            params = np.array(model_params)
            cut_off = (self.input_size + 1) * self.hidden_size
            params_1 = params[:cut_off]
            params_2 = params[cut_off:]
            self.bias_hidden = params_1[:self.hidden_size]
            self.weight_hidden = params_1[self.hidden_size:].reshape(
                self.input_size, self.hidden_size)
            self.bias_output = params_2[:self.na]
            self.weight_output = params_2[self.na:].reshape(
                self.hidden_size, self.na)
        else:
            self.bias = np.array(model_params[:self.na])
            self.weight = np.array(model_params[self.na:]).reshape(
                self.input_size, self.na)

    def load_model(self, filename):
        with open(filename) as f:
            data = json.load(f)
        print('loading file %s' % (filename))
        self.data = data
        model_params = np.array(data[0])  # assuming other stuff is in data
        self.set_model_params(model_params)

    def get_random_model_params(self, stdev=0.1):
        #return np.random.randn(self.param_count)*stdev
        return np.random.standard_cauchy(
            self.param_count) * stdev  # spice things up

    def init_random_model_params(self, stdev=0.1):
        params = self.get_random_model_params(stdev=stdev)
        self.set_model_params(params)
        vae_params = self.vae.get_random_model_params(stdev=stdev)
        self.vae.set_model_params(vae_params)
        rnn_params = self.rnn.get_random_model_params(stdev=stdev)
        self.rnn.set_model_params(rnn_params)
Пример #30
0
class CarRacing:

    # Parameters
    # - type: Name of environment. Default is classic Car Racing game, but can be changed to introduce perturbations in environment
    # - history_pick: Size of history
    # - seed: List of seeds to sample from during training. Default is none (random games)
    def __init__(self, type="CarRacing", history_pick=4, seed=None, detect_edges=False, detect_grass=False, flip=False):
        self.name = type + str(time.time())
        random.seed(30)
        self.env = make_env('CarRacing-v0', random.randint(1,10000000), render_mode = False, full_episode = True)
        self.image_dimension = [64,64]
        self.history_pick = history_pick
        self.state_space_size = history_pick * np.prod(self.image_dimension)
        self.action_space_size = 5
        self.state_shape = [None, self.history_pick] + list(self.image_dimension)
        self.history = []
        self.action_dict = {0: [-1, 0, 0], 1: [1, 0, 0], 2: [0, 1, 0], 3: [0, 0, 0.8], 4: [0, 0, 0]}
        self.seed = seed
        self.detect_edges = detect_edges
        self.detect_grass = detect_grass
        self.flip = flip
        self.flip_episode = False
        self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True)
        self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)
        self.vae.load_json('vae/vae.json')
        self.rnn.load_json('rnn/rnn.json')

    # returns a random action
    def sample_action_space(self):
        return np.random.randint(self.action_space_size)

    def map_action(self, action):
        if self.flip_episode and action <= 1:
            action = 1 - action
        return self.action_dict[action]

    # resets the environment and returns the initial state
    def reset(self, test=False):
        self.state_rnn = rnn_init_state(self.rnn)
        if self.seed:
            self.env.seed(random.choice(self.seed))
        self.flip_episode = random.random() > 0.5 and not test and self.flip
        state, self.state_rnn = self.encode_obs(self.env.reset(), self.state_rnn, np.array([0.5, 0.2, 0.8]))
        return state, 1

    # take action 
    def step(self, action, test=False):
        action = self.map_action(action)
        total_reward = 0
        n = 1 if test else random.choice([2, 3, 4])
        for i in range(n):
            next_state, reward, done, info = self.env.step(action)
            next_state, self. state_rnn = self.encode_obs(next_state, self.state_rnn, action)
            total_reward += reward
            info = {'true_done': done}
            if done: break   
        return next_state, total_reward, done, info, 1

    def render(self):
        self.env.render()

    # process state and return the current history
    def process(self, state):
        self.add_history(state)
        in_grass = utils.in_grass(state)
        if len(self.history) < self.history_pick:
            zeros = np.zeros(self.image_dimension)
            result = np.tile(zeros, ((self.history_pick - len(self.history)), 1, 1))
            result = np.concatenate((result, np.array(self.history)))
        else:
            result = np.array(self.history)
        return result, in_grass

    def add_history(self, state):
        if len(self.history) >= self.history_pick:
            self.history.pop(0)
        #temp = utils.process_image(state, detect_edges=self.detect_edges, flip=self.flip_episode)
        self.history.append(state)

    def __str__(self):
    	return self.name + '\nseed: {0}\nactions: {1}'.format(self.seed, self.action_dict)

    def encode_obs(self, obs, prev_state, action):
        # convert raw obs to z, mu, logvar
        result = np.copy(obs).astype(np.float)/255.0
        result = result.reshape(1, 64, 64, 3)
        mu, logvar = self.vae.encode_mu_logvar(result)
        mu = mu[0]
        logvar = logvar[0]
        s = logvar.shape
        z = mu + np.exp(logvar/2.0) * np.random.randn(*s)
        h = rnn_output(prev_state, z, 4)
        next_state = rnn_next_state(self.rnn, z, np.array(action), prev_state)
        return np.concatenate([h, z]), next_state