Exemplo n.º 1
0
    def __init__(self, arglist):
        self.env_name = arglist.game
        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)
        self.vae.load_json(arglist.vae_file)
        self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)
        self.rnn.load_json(arglist.rnn_file)
        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True

        self.input_size = rnn_output_size(EXP_MODE)
        self.z_size = 32

        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            self.hidden_size = 40
            self.weight_hidden = np.random.randn(self.input_size,
                                                 self.hidden_size)
            self.bias_hidden = np.random.randn(self.hidden_size)
            self.weight_output = np.random.randn(self.hidden_size, 2)
            self.bias_output = np.random.randn(2)
            self.param_count = ((self.input_size + 1) *
                                self.hidden_size) + (self.hidden_size * 2 + 2)
        else:
            self.weight = np.random.randn(self.input_size, 2)
            self.bias = np.random.randn(2)
            self.param_count = (self.input_size) * 2 + 2

        self.render_mode = False
Exemplo n.º 2
0
    def __init__(self, load_model=True):
        self.env_name = './VisualPushBlock_withBlock_z_info.x86_64'  #'./VisualPushBlock.x86_64'
        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)
        self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)

        if load_model:
            self.vae.load_json('vae/vae.json')
            self.rnn.load_json('rnn/rnn.json')

        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True

        self.input_size = rnn_output_size(EXP_MODE)
        self.z_size = z_size

        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer ###CHANGE is made here
            self.hidden_size = 40
            self.weight_hidden = np.random.randn(self.input_size,
                                                 self.hidden_size)
            self.bias_hidden = np.random.randn(self.hidden_size)
            self.weight_output = np.random.randn(self.hidden_size, ACTION_SIZE)
            self.bias_output = np.random.randn(ACTION_SIZE)
            self.param_count = ((self.input_size + 1) * self.hidden_size) + (
                self.hidden_size * ACTION_SIZE + ACTION_SIZE)
        else:
            self.weight = np.random.randn(self.input_size, ACTION_SIZE)
            self.bias = np.random.randn(ACTION_SIZE)
            self.param_count = (self.input_size) * ACTION_SIZE + ACTION_SIZE

        self.render_mode = False
Exemplo n.º 3
0
  def __init__(self):
    self.env_name = "carracing"
    self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True)
    self.vae.load_json('vae/vae.json')
    self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)
    self.rnn.load_json('rnn/rnn.json')
    self.state = rnn_init_state(self.rnn)
    self.rnn_mode = True

    self.input_size = rnn_output_size(EXP_MODE)
    self.z_size = 32

    if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer
      self.hidden_size = 40
      self.weight_hidden = np.random.randn(self.input_size, self.hidden_size)
      self.bias_hidden = np.random.randn(self.hidden_size)
      self.weight_output = np.random.randn(self.hidden_size, 3)
      self.bias_output = np.random.randn(3)
      self.param_count = ((self.input_size+1)*self.hidden_size) + (self.hidden_size*3+3)
    else:
      self.weight = np.random.randn(self.input_size, 3)
      self.bias = np.random.randn(3)
      self.param_count = (self.input_size)*3+3

    self.render_mode = False
Exemplo n.º 4
0
  def __init__(self, load_model=True):
    # For Mac
    # self.env_name = "/Users/intuinno/codegit/pushBlock/app/mac/VisualPushBlockContinuous"
    # For linux
    self.env_name = "/home/intuinno/codegit/pushblock/app/linux/pushblock.x86_64"
    self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True)

    self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)

    if load_model:
      self.vae.load_json('vae/vae.json')
      self.rnn.load_json('rnn/rnn.json')

    self.state = rnn_init_state(self.rnn)
    self.rnn_mode = True

    self.input_size = rnn_output_size(EXP_MODE)
    self.z_size = 32


    if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer
      self.hidden_size = 40
      self.weight_hidden = np.random.randn(self.input_size, self.hidden_size)
      self.bias_hidden = np.random.randn(self.hidden_size)
      self.weight_output = np.random.randn(self.hidden_size, 3)
      self.bias_output = np.random.randn(3)
      self.param_count = ((self.input_size+1)*self.hidden_size) + (self.hidden_size*3+3)
    else:
      self.weight = np.random.randn(self.input_size, 3)
      self.bias = np.random.randn(3)
      self.param_count = (self.input_size)*3+3

    self.render_mode = False
Exemplo n.º 5
0
    def __init__(self,
                 args,
                 load_model=True,
                 full_episode=False,
                 with_obs=False):
        super(CarRacingMDNRNN, self).__init__(full_episode=full_episode)
        self.with_obs = with_obs  # whether or not to return the frame with the encodings
        self.vae = CVAE(args)
        self.rnn = MDNRNN(args)

        if load_model:
            self.vae.set_weights(
                tf.keras.models.load_model('results/{}/{}/tf_vae'.format(
                    args.exp_name, args.env_name),
                                           compile=False).get_weights())
            self.rnn.set_weights(
                tf.keras.models.load_model('results/{}/{}/tf_rnn'.format(
                    args.exp_name, args.env_name),
                                           compile=False).get_weights())

        self.rnn_states = rnn_init_state(self.rnn)

        self.full_episode = False
        self.observation_space = Box(low=np.NINF,
                                     high=np.Inf,
                                     shape=(32 + 256))
Exemplo n.º 6
0
    def __init__(self,
                 args,
                 load_model=True,
                 full_episode=False,
                 with_obs=False):
        super(CarRacingMDNRNN, self).__init__(full_episode=full_episode)
        self.with_obs = with_obs  # whether or not to return the frame with the encodings
        self.vae = CVAE(args)
        self.rnn = MDNRNN(args)

        if load_model:
            self.vae.set_weights([
                param_i.numpy() for param_i in tf.saved_model.load(
                    'results/{}/tf_vae'.format(args.env_name)).variables
            ])
            self.rnn.set_weights([
                param_i.numpy() for param_i in tf.saved_model.load(
                    'results/{}/tf_rnn'.format(args.env_name)).variables
            ])
        self.rnn_states = rnn_init_state(self.rnn)

        self.full_episode = False
        self.observation_space = Box(low=np.NINF,
                                     high=np.Inf,
                                     shape=(args.z_size +
                                            args.rnn_size * args.state_space))
Exemplo n.º 7
0
 def reset(self, test=False):
     self.state_rnn = rnn_init_state(self.rnn)
     if self.seed:
         self.env.seed(random.choice(self.seed))
     self.flip_episode = random.random() > 0.5 and not test and self.flip
     state, self.state_rnn = self.encode_obs(self.env.reset(), self.state_rnn, np.array([0.5, 0.2, 0.8]))
     return state, 1
Exemplo n.º 8
0
    def __init__(self, arglist, action_space, scope, load_model=True):
        self.action_space = action_space
        self.arglist = arglist
        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)

        hps_sample = hps_model._replace(
            batch_size=1,
            input_seq_width=32 + arglist.action_space +
            (arglist.agent_num - 1) * arglist.action_space * arglist.timestep,
            max_seq_len=1,
            use_recurrent_dropout=0,
            is_training=0)

        self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)

        if load_model:
            self.vae.load_json(arglist.vae_model_dir)
            self.rnn.load_json(arglist.rnn_model_dir)

        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True
        if arglist.inference:
            self.input_size = rnn_output_size(
                EXP_MODE) + (arglist.agent_num - 1) * arglist.action_space
        else:
            self.input_size = rnn_output_size(EXP_MODE)
        self.z_size = 32

        # action trajectories recording
        self.act_traj = [
            collections.deque(np.zeros(
                (arglist.timestep, arglist.action_space)),
                              maxlen=arglist.timestep)
        ] * (arglist.agent_num - 1)
        self.oppo_model = Oppo_Model(arglist.agent_num, arglist.timestep,
                                     arglist.action_space,
                                     arglist.action_space,
                                     "oppo_model_{}".format(scope))
        self.inference = arglist.inference

        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            self.hidden_size = 40
            self.weight_hidden = np.random.randn(self.input_size,
                                                 self.hidden_size)
            self.bias_hidden = np.random.randn(self.hidden_size)
            self.weight_output = np.random.randn(self.hidden_size,
                                                 self.action_space)
            self.bias_output = np.random.randn(self.action_space)
            self.param_count = ((self.input_size + 1) * self.hidden_size) + (
                self.hidden_size * self.action_space + self.action_space)
        else:
            self.weight = np.random.randn(self.input_size, self.action_space)
            self.bias = np.random.randn(self.action_space)
            self.param_count = (
                self.input_size) * self.action_space + self.action_space
Exemplo n.º 9
0
 def reset(self):
     self.rnn_states = rnn_init_state(self.rnn)
     if self.with_obs:
         [z_state, obs] = super(CarRacingMDNRNN, self).reset()  # calls step
         self.N_tiles = len(self.track)
         return [z_state, obs]
     else:
         z_state = super(CarRacingMDNRNN, self).reset()  # calls step
         self.N_tiles = len(self.track)
         return z_state
Exemplo n.º 10
0
    def _reset(self):
        obs = super(DoomTakeCoverMDNRNN, self)._reset()
        small_obs = self._process_frame(obs)
        self.current_obs = small_obs
        self.rnn_states = rnn_init_state(self.rnn)
        self.z = self._encode(small_obs)
        self.restart = 1
        self.frame_count = 0

        if self.with_obs:
            return [self._current_state(), self.current_obs]
        else:
            return self._current_state()
Exemplo n.º 11
0
    def reset(self):
        self.rnn_states = rnn_init_state(self.rnn)
        obs = super(CarRacingMDNRNN, self).reset()
        obs = self._process_frame(obs)
        z = self.encode_obs(obs)
        h = tf.squeeze(self.rnn_states[0])
        z_h = tf.concat([z, h], axis=-1)

        if self.with_obs:
            return [z_h, obs]
        else:
            z_h = super(CarRacingMDNRNN, self).reset()  # calls step
            return z_h
Exemplo n.º 12
0
  def __init__(self, load_model=True, full_episode=False):
    super(CarRacingMDNRNN, self).__init__(full_episode=full_episode)
    self.vae = CVAE(batch_size=1)
    self.rnn = MDNRNN(hps_sample)
     
    if load_model:
      self.vae.load_json('tf_vae/vae.json')
      self.rnn.load_json('tf_rnn/rnn.json')

    self.rnn_states = rnn_init_state(self.rnn)
    
    self.full_episode = False 
    self.observation_space = Box(low=np.NINF, high=np.Inf, shape=(32+256))
Exemplo n.º 13
0
    def reset(self, **kwargs):
        # TODO: Is this zeroes? Can this just be None?
        self.rnn_state = rnn_init_state(self.rnn)  # [h, c]

        # reset() of wrapped environment might call step() and already set self.z in the process
        self.z = None
        obs = self.env.reset(**kwargs)
        # If step() was not called, calculate z from initial observation
        if self.z is None:
            self.z = self.encode_image_to_z(obs)

        obs = self.modify_observation(obs)

        return obs
Exemplo n.º 14
0
    def __init__(self,
                 index=0,
                 rl_training=False,
                 restore_model=False,
                 global_buffer=None,
                 net=None,
                 strategy_agent=None,
                 greedy_action=False,
                 extract_save_dir=None,
                 load_model=True,
                 ntype='worldmodel'):
        super(MiniSourceAgent, self).__init__()
        self.net = net
        self.index = index
        self.global_buffer = global_buffer
        self.restore_model = restore_model

        # model in brain
        self.strategy_agent = strategy_agent
        self.strategy_act = None

        # count num
        self.step = 0

        self.strategy_wait_secs = 2
        self.strategy_flag = False
        self.policy_wait_secs = 2
        self.policy_flag = True

        self.env = None
        self.obs = None

        # buffer
        self.local_buffer = Buffer()

        self.num_players = 2
        self.on_select = None
        self._result = None
        self._gases = None
        self.is_end = False

        self.greedy_action = greedy_action
        self.rl_training = rl_training

        self.extract_save_dir = extract_save_dir

        self.rnn_state = rnn_init_state(self.net.rnn)
Exemplo n.º 15
0
    def reset(self):
        super(MiniSourceAgent, self).reset()
        self.step = 0
        self.obs = None
        self._result = None
        self._gases = None
        self.is_end = False

        self.strategy_flag = False
        self.policy_flag = True

        self.local_buffer.reset()

        if self.strategy_agent is not None:
            self.strategy_agent.reset()

        self.rnn_state = rnn_init_state(self.net.rnn)
Exemplo n.º 16
0
    def __init__(self,
                 model_name='',
                 load_model=True,
                 load_full_model=False,
                 full_model_path=''):
        self.model_name = model_name
        self.env_name = "carracing"
        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)

        self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)

        if load_full_model:
            self.vae.load_json(os.path.join(full_model_path, 'vae.json'))
            self.rnn.load_json(os.path.join(full_model_path, 'rnn.json'))
        elif load_model:
            self.vae.load_json(
                os.path.join(vae_path, self.model_name + '_vae.json'))
            self.rnn.load_json(
                os.path.join(rnn_path, self.model_name + '_rnn.json'))

        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True

        self.input_size = rnn_output_size(EXP_MODE)
        self.z_size = 32

        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            self.hidden_size = 40
            self.weight_hidden = np.random.randn(self.input_size,
                                                 self.hidden_size)
            self.bias_hidden = np.random.randn(self.hidden_size)
            self.weight_output = np.random.randn(self.hidden_size, 3)
            self.bias_output = np.random.randn(3)
            self.param_count = ((self.input_size + 1) *
                                self.hidden_size) + (self.hidden_size * 3 + 3)
        else:
            self.weight = np.random.randn(self.input_size, 3)
            self.bias = np.random.randn(3)
            self.param_count = (self.input_size) * 3 + 3

        self.render_mode = False
Exemplo n.º 17
0
    def __init__(self, load_model=True):
        self.env_name = "Pong"
        self._make_env()

        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)

        hps_sample_dynamic = hps_sample._replace(num_actions=self.num_actions)
        self.rnn = MDNRNN(hps_sample_dynamic, gpu_mode=False, reuse=True)

        if load_model:
            self.vae.load_json('vae/vae.json')
            self.rnn.load_json('rnn/rnn.json')

        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True

        self.input_size = rnn_output_size(EXP_MODE)
        self.z_size = 32

        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            raise Exception("not ported for atari")
            self.hidden_size = 40
            self.weight_hidden = np.random.randn(self.input_size,
                                                 self.hidden_size)
            self.bias_hidden = np.random.randn(self.hidden_size)
            self.weight_output = np.random.randn(self.hidden_size,
                                                 self.num_actions)
            self.bias_output = np.random.randn(self.num_actions)
            self.param_count = ((self.input_size + 1) * self.hidden_size) + (
                (self.hidden_size + 1) * self.num_actions)
        else:
            # TODO: Not known until env.action_space is queried...
            self.weight = np.random.randn(self.input_size, self.num_actions)
            self.bias = np.random.randn(self.num_actions)
            self.param_count = (self.input_size + 1) * self.num_actions

        self.render_mode = False
Exemplo n.º 18
0
    def __init__(self, load_model=True, env_name="Pong-v0", render_mode=False):
        self.env_name = env_name
        self.make_env()
        self.z_size = 32

        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)
        hps_atari = hps_sample._replace(input_seq_width=self.z_size + self.na)
        self.rnn = MDNRNN(hps_atari, gpu_mode=False, reuse=True)

        if load_model:
            self.vae.load_json('vae/vae.json')
            self.rnn.load_json('rnn/rnn.json')

        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True

        self.input_size = rnn_output_size(EXP_MODE)
        self.init_controller()

        self.render_mode = False
Exemplo n.º 19
0
 def reset(self):
     self.rnn_state = rnn_init_state(self.rnn)
     self.z = self._sample_init_z()
     obs = OrderedDict(features=rnn_output(self.rnn_state, self.z, self.features_mode))
     return obs
Exemplo n.º 20
0
 def reset(self):
   self.rnn_states = rnn_init_state(self.rnn)
   z_h = super(CarRacingWrapper, self).reset() # calls step
   return z_h
Exemplo n.º 21
0
 def reset(self):
     self.rnn_states = rnn_init_state(self.rnn)
     z = np.expand_dims(self._sample_init_z(), axis=0)
     self.o = z
     z_ch = tf.concat([z, self.rnn_states[1], self.rnn_states[0]], axis=-1)
     return tf.squeeze(z_ch)
Exemplo n.º 22
0
def train(sess, env, actor, critic, global_step):

    # Set up summary Ops
    summary_ops, summary_vars = build_summaries()

    sess.run(tf.global_variables_initializer())

    # load model if have
    saver = tf.train.Saver()
    checkpoint = tf.train.get_checkpoint_state("./results")

    if checkpoint and checkpoint.model_checkpoint_path:
        saver.restore(sess, checkpoint.model_checkpoint_path)
        print("Successfully loaded:", checkpoint.model_checkpoint_path)
        print("global step: ", global_step.eval())

    else:
        print("Could not find old network weights")

    writer = tf.summary.FileWriter(SUMMARY_DIR, sess.graph)

    # Initialize target network weights
    actor.update_target_network()
    critic.update_target_network()

    # Initialize replay memory
    replay_buffer = ReplayBuffer(BUFFER_SIZE, RANDOM_SEED)
    state = rnn_init_state(rnn)
    i = global_step.eval()
    eps = 1
    lr = INITIAL_LR
    while True:
        i += 1
        s = env.reset()
        s, state = encode_obs(s, state, np.array([0.5, 0.2, 0.8]))
        # s = prepro(s)
        ep_reward = 0
        ep_ave_max_q = 0
        eps *= EPS_DECAY_RATE
        lr *= LR_DECAY_RATE
        lr = np.max([lr, MINI_LR])  # minimum of learning rate is MINI_LR
        if i % SAVE_STEP == 0:  # save check point every 1000 episode
            sess.run(global_step.assign(i))
            save_path = saver.save(sess,
                                   "./results/model.ckpt",
                                   global_step=global_step)
            print("Model saved in file: %s" % save_path)
            print("Successfully saved global step: ", global_step.eval())

        for j in range(MAX_EP_STEPS):

            if RENDER_ENV:
                env.render()
            # print(s.shape)

            a = actor.predict(np.reshape(s, (-1, 16, 10, 2)))
            print(a)
            # action = a[0] + 1./(1+i+j) # add noise for exploration
            noise = np.random.normal(0, 0.2 * eps, 3)
            noise[1] = np.random.normal(0.4, 0.1 * eps)
            action = a[0] + noise
            s2, r, terminal, info = env.step(action)
            s2, state = encode_obs(s2, state, action)
            # s2 = prepro(s2)
            action = np.expand_dims(action, axis=0)
            # plt.imshow(s2)
            # plt.show()
            # if r > 0:
            #     r = 1
            # elif r < 0:
            #     r = -1
            # print 'r: ',r
            # replay_buffer.add(np.reshape(s, (96, 96, 3)), np.reshape(action, (actor.a_dim,)), r,
            #     terminal, np.reshape(s2, (96, 96, 3)),lr)
            replay_buffer.add(s, np.reshape(action, (actor.a_dim, )), r,
                              terminal, s2, lr)
            # Keep adding experience to the memory until
            # there are at least minibatch size samples
            if replay_buffer.size() > MINIBATCH_SIZE:
                s_batch, a_batch, r_batch, t_batch, s2_batch, lr_batch = \
                    replay_buffer.sample_batch(MINIBATCH_SIZE)

                # Calculate targets
                s2_batch = np.reshape(s2_batch, (64, 16, 10, 2))
                target_q = critic.predict_target(
                    s2_batch, actor.predict_target(s2_batch))

                y_i = []
                for k in range(MINIBATCH_SIZE):
                    if t_batch[k]:
                        y_i.append(r_batch[k])
                    else:
                        y_i.append(r_batch[k] + GAMMA * target_q[k])

                # Update the critic given the targets
                s_batch = np.reshape(s_batch, (64, 16, 10, 2))
                predicted_q_value, _ = critic.train(
                    s_batch, a_batch, np.reshape(y_i, (MINIBATCH_SIZE, 1)),
                    lr_batch)

                ep_ave_max_q += np.amax(predicted_q_value)
                # print ep_ave_max_q
                # Update the actor policy using the sampled gradient
                a_outs = actor.predict(s_batch)
                grads = critic.action_gradients(s_batch, a_outs)
                # print grads[0]
                actor.train(s_batch, grads[0], lr_batch)

                # Update target networks
                actor.update_target_network()
                critic.update_target_network()
                summary_str = sess.run(summary_ops,
                                       feed_dict={
                                           summary_vars[0]: ep_reward,
                                           summary_vars[1]:
                                           ep_ave_max_q / float(j)
                                       })

                writer.add_summary(summary_str, i)
                writer.flush()

                print('| Reward	: %.2i' % int(ep_reward), " | Episode", i, \
                    '| Qmax: %.4f' % (ep_ave_max_q / float(j)))

            s = s2
            ep_reward += r

            if terminal:

                # summary_str = sess.run(summary_ops, feed_dict={
                #     summary_vars[0]: ep_reward,
                #     summary_vars[1]: ep_ave_max_q / float(j)
                # })

                # writer.add_summary(summary_str, i)
                # writer.flush()

                # print '| Reward: %.2i' % int(ep_reward), " | Episode", i, \
                #     '| Qmax: %.4f' % (ep_ave_max_q / float(j))

                break
Exemplo n.º 23
0
 def reset(self):
   self.state = rnn_init_state(self.rnn)
Exemplo n.º 24
0
vae.load_json(os.path.join('vae', 'vae.json'))


# Fourth, build the RNN
hps_atari_sample = hps_sample._replace(input_seq_width=z_size+na)
OUTWIDTH = hps_atari_sample.output_seq_width
rnn = MDNRNN(hps_atari_sample, gpu_mode=False)
rnn.load_json(os.path.join('rnn', 'rnn.json'))


print("All model loaded.")
# Fifth, run the evaluation. -> We have no predictions about the first frame.

start = time.time()

state = rnn_init_state(rnn) # initialize the state.
pz = None

for i in range(steps):

  ob = obs[i:i+1] # (1, 64, 64, 1)
  action = oh_actions[i:i+1] # (1, n)

  z = vae.encode(ob) # (1, 32) VAE done!
  rnn_z = np.expand_dims(z, axis=0) # (1, 1, 32)
  action = np.expand_dims(action, axis=0) # (1, 1, n)


  input_x = np.concatenate([rnn_z, action], axis=2) # (1, 1, 32+n)
  feed = {rnn.input_x: input_x, rnn.initial_state: state} # predict the next state and next z.