예제 #1
0
class ModelMCTS(Model):
    def __init__(self, load_model=True):
        self.env_name = "carracing"
        self.env = make_env(self.env_name,
                            seed=SEED,
                            render_mode=render_mode,
                            full_episode=False)
        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)

        self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)

        if load_model:
            self.vae.load_json('../vae/vae.json')
            self.rnn.load_json('../rnn/rnn.json')

        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True

        self.input_size = rnn_output_size(EXP_MODE)
        self.z_size = 32

        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            self.hidden_size = 40
            self.weight_hidden = np.random.randn(self.input_size,
                                                 self.hidden_size)
            self.bias_hidden = np.random.randn(self.hidden_size)
            self.weight_output = np.random.randn(self.hidden_size, 3)
            self.bias_output = np.random.randn(3)
            self.param_count = ((self.input_size + 1) *
                                self.hidden_size) + (self.hidden_size * 3 + 3)
        else:
            self.weight = np.random.randn(self.input_size, 3)
            self.bias = np.random.randn(3)
            self.param_count = (self.input_size) * 3 + 3

        self.render_mode = False
        self.mct = None

    def get_action(self, z):
        a = random_linear_sample(-1, 1)
        b = random_linear_sample(0, 1)
        c = random_linear_sample(0, 1)
        actions = dp(a, b, c)
        action, self.mct = mcts.mcts(z,
                                     self.env,
                                     actions,
                                     old_tree=self.mct,
                                     tree_depth=6,
                                     simulate_depth=200)

        self.state = rnn_next_state(self.rnn, z, action, self.state)

        return action
예제 #2
0
class CarRacingMDNRNN(CarRacingWrapper):
  def __init__(self, load_model=True, full_episode=False):
    super(CarRacingMDNRNN, self).__init__(full_episode=full_episode)
    self.vae = CVAE(batch_size=1)
    self.rnn = MDNRNN(hps_sample)
     
    if load_model:
      self.vae.load_json('tf_vae/vae.json')
      self.rnn.load_json('tf_rnn/rnn.json')

    self.rnn_states = rnn_init_state(self.rnn)
    
    self.full_episode = False 
    self.observation_space = Box(low=np.NINF, high=np.Inf, shape=(32+256))

  def encode_obs(self, obs):
    # convert raw obs to z, mu, logvar
    result = np.copy(obs).astype(np.float)/255.0
    result = result.reshape(1, 64, 64, 3)
    mu, logvar = self.vae.encode_mu_logvar(result)
    mu = mu[0]
    logvar = logvar[0]
    s = logvar.shape
    z = mu + np.exp(logvar/2.0) * np.random.randn(*s)
    return z, mu, logvar

  def reset(self):
    self.rnn_states = rnn_init_state(self.rnn)
    z_h = super(CarRacingWrapper, self).reset() # calls step
    return z_h

  def _step(self, action):
    obs, reward, done, _ = super(CarRacingMDNRNN, self)._step(action)
    z, _, _ = self.encode_obs(obs)
    h = tf.squeeze(self.rnn_states[0])
    z_h = tf.concat([z, h], axis=-1)

    if action is not None: # don't compute state on reset
        self.rnn_states = rnn_next_state(self.rnn, z, action, self.rnn_states)
    return z_h, reward, done, {}
예제 #3
0
def sample_vae2(args):
    """ For vae from https://github.com/hardmaru/WorldModelsExperiments.git
    """
    z_size = 32
    batch_size = args.count
    learning_rate = 0.0001
    kl_tolerance = 0.5
    model_path_name = "tf_vae"

    reset_graph()
    vae = ConvVAE(
        z_size=z_size,
        batch_size=batch_size,
        learning_rate=learning_rate,
        kl_tolerance=kl_tolerance,
        is_training=False,
        reuse=False,
        gpu_mode=False)  # use GPU on batchsize of 1000 -> much faster

    vae.load_json(os.path.join(model_path_name, 'vae.json'))

    z = np.random.normal(size=(args.count, z_size))
    samples = vae.decode(z)
    input_dim = samples.shape[1:]

    n = args.count
    plt.figure(figsize=(20, 4))
    plt.title('VAE samples')
    for i in range(n):
        ax = plt.subplot(2, n, i + 1)
        plt.imshow(samples[i].reshape(input_dim[0], input_dim[1],
                                      input_dim[2]))
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)

    #plt.savefig( image_path )
    plt.show()
예제 #4
0
class CarRacing:

    # Parameters
    # - type: Name of environment. Default is classic Car Racing game, but can be changed to introduce perturbations in environment
    # - history_pick: Size of history
    # - seed: List of seeds to sample from during training. Default is none (random games)
    def __init__(self, type="CarRacing", history_pick=4, seed=None, detect_edges=False, detect_grass=False, flip=False):
        self.name = type + str(time.time())
        random.seed(30)
        self.env = make_env('CarRacing-v0', random.randint(1,10000000), render_mode = False, full_episode = True)
        self.image_dimension = [64,64]
        self.history_pick = history_pick
        self.state_space_size = history_pick * np.prod(self.image_dimension)
        self.action_space_size = 5
        self.state_shape = [None, self.history_pick] + list(self.image_dimension)
        self.history = []
        self.action_dict = {0: [-1, 0, 0], 1: [1, 0, 0], 2: [0, 1, 0], 3: [0, 0, 0.8], 4: [0, 0, 0]}
        self.seed = seed
        self.detect_edges = detect_edges
        self.detect_grass = detect_grass
        self.flip = flip
        self.flip_episode = False
        self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True)
        self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)
        self.vae.load_json('vae/vae.json')
        self.rnn.load_json('rnn/rnn.json')

    # returns a random action
    def sample_action_space(self):
        return np.random.randint(self.action_space_size)

    def map_action(self, action):
        if self.flip_episode and action <= 1:
            action = 1 - action
        return self.action_dict[action]

    # resets the environment and returns the initial state
    def reset(self, test=False):
        self.state_rnn = rnn_init_state(self.rnn)
        if self.seed:
            self.env.seed(random.choice(self.seed))
        self.flip_episode = random.random() > 0.5 and not test and self.flip
        state, self.state_rnn = self.encode_obs(self.env.reset(), self.state_rnn, np.array([0.5, 0.2, 0.8]))
        return state, 1

    # take action 
    def step(self, action, test=False):
        action = self.map_action(action)
        total_reward = 0
        n = 1 if test else random.choice([2, 3, 4])
        for i in range(n):
            next_state, reward, done, info = self.env.step(action)
            next_state, self. state_rnn = self.encode_obs(next_state, self.state_rnn, action)
            total_reward += reward
            info = {'true_done': done}
            if done: break   
        return next_state, total_reward, done, info, 1

    def render(self):
        self.env.render()

    # process state and return the current history
    def process(self, state):
        self.add_history(state)
        in_grass = utils.in_grass(state)
        if len(self.history) < self.history_pick:
            zeros = np.zeros(self.image_dimension)
            result = np.tile(zeros, ((self.history_pick - len(self.history)), 1, 1))
            result = np.concatenate((result, np.array(self.history)))
        else:
            result = np.array(self.history)
        return result, in_grass

    def add_history(self, state):
        if len(self.history) >= self.history_pick:
            self.history.pop(0)
        #temp = utils.process_image(state, detect_edges=self.detect_edges, flip=self.flip_episode)
        self.history.append(state)

    def __str__(self):
    	return self.name + '\nseed: {0}\nactions: {1}'.format(self.seed, self.action_dict)

    def encode_obs(self, obs, prev_state, action):
        # convert raw obs to z, mu, logvar
        result = np.copy(obs).astype(np.float)/255.0
        result = result.reshape(1, 64, 64, 3)
        mu, logvar = self.vae.encode_mu_logvar(result)
        mu = mu[0]
        logvar = logvar[0]
        s = logvar.shape
        z = mu + np.exp(logvar/2.0) * np.random.randn(*s)
        h = rnn_output(prev_state, z, 4)
        next_state = rnn_next_state(self.rnn, z, np.array(action), prev_state)
        return np.concatenate([h, z]), next_state
예제 #5
0
class Model:
    ''' simple one layer model for car racing '''
    def __init__(self, load_model=True, env_name="Pong-v0", render_mode=False):
        self.env_name = env_name
        self.make_env()
        self.z_size = 32

        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)
        hps_atari = hps_sample._replace(input_seq_width=self.z_size + self.na)
        self.rnn = MDNRNN(hps_atari, gpu_mode=False, reuse=True)

        if load_model:
            self.vae.load_json('vae/vae.json')
            self.rnn.load_json('rnn/rnn.json')

        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True

        self.input_size = rnn_output_size(EXP_MODE)
        self.init_controller()

        self.render_mode = False

    # INIT The Controller After the enviroment Creation.
    def make_env(self, seed=-1, render_mode=False):
        self.render_mode = render_mode
        self.env = make_env(self.env_name, seed=seed, render_mode=render_mode)
        self.na = self.env.action_space.n  # discrete by default.

    def init_controller(self):
        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            self.hidden_size = 40
            self.weight_hidden = np.random.randn(self.input_size,
                                                 self.hidden_size)
            self.bias_hidden = np.random.randn(self.hidden_size)
            self.weight_output = np.random.randn(
                self.hidden_size, self.na)  # pong. Modify later.
            self.bias_output = np.random.randn(self.na)
            self.param_count = (self.input_size + 1) * self.hidden_size + (
                self.hidden_size + 1) * self.na
        else:
            self.weight = np.random.randn(self.input_size, self.na)
            self.bias = np.random.randn(self.na)
            self.param_count = (self.input_size + 1) * self.na

    def reset(self):
        self.state = rnn_init_state(self.rnn)

    def encode_obs(self, obs):
        # convert raw obs to z, mu, logvar
        result = np.copy(obs).astype(np.float) / 255.0
        result = result.reshape(1, 64, 64, 1)
        mu, logvar = self.vae.encode_mu_logvar(result)
        mu = mu[0]
        logvar = logvar[0]
        s = logvar.shape
        z = mu + np.exp(logvar / 2.0) * np.random.randn(*s)
        return z, mu, logvar

    def get_action(self, z, epsilon=0.0):
        h = rnn_output(self.state, z, EXP_MODE)
        '''
    action = np.dot(h, self.weight) + self.bias
    action[0] = np.tanh(action[0])
    action[1] = sigmoid(action[1])
    action[2] = clip(np.tanh(action[2]))
    '''
        if np.random.rand() < epsilon:
            action = np.random.randint(0, self.na)
        else:
            if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
                h = np.maximum(
                    np.dot(h, self.weight_hidden) + self.bias_hidden, 0)
                action = np.argmax(
                    np.dot(h, self.weight_output) + self.bias_output)
            else:
                action = np.argmax(np.dot(h, self.weight) + self.bias)

        oh_action = np.zeros(self.na)
        oh_action[action] = 1

        # action[1] = (action[1]+1.0) / 2.0
        # action[2] = clip(action[2])

        # TODO check about this fucntion
        self.state = rnn_next_state(self.rnn, z, oh_action, self.state)

        return action

    def set_model_params(self, model_params):
        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            params = np.array(model_params)
            cut_off = (self.input_size + 1) * self.hidden_size
            params_1 = params[:cut_off]
            params_2 = params[cut_off:]
            self.bias_hidden = params_1[:self.hidden_size]
            self.weight_hidden = params_1[self.hidden_size:].reshape(
                self.input_size, self.hidden_size)
            self.bias_output = params_2[:self.na]
            self.weight_output = params_2[self.na:].reshape(
                self.hidden_size, self.na)
        else:
            self.bias = np.array(model_params[:self.na])
            self.weight = np.array(model_params[self.na:]).reshape(
                self.input_size, self.na)

    def load_model(self, filename):
        with open(filename) as f:
            data = json.load(f)
        print('loading file %s' % (filename))
        self.data = data
        model_params = np.array(data[0])  # assuming other stuff is in data
        self.set_model_params(model_params)

    def get_random_model_params(self, stdev=0.1):
        #return np.random.randn(self.param_count)*stdev
        return np.random.standard_cauchy(
            self.param_count) * stdev  # spice things up

    def init_random_model_params(self, stdev=0.1):
        params = self.get_random_model_params(stdev=stdev)
        self.set_model_params(params)
        vae_params = self.vae.get_random_model_params(stdev=stdev)
        self.vae.set_model_params(vae_params)
        rnn_params = self.rnn.get_random_model_params(stdev=stdev)
        self.rnn.set_model_params(rnn_params)
예제 #6
0
class Model:
    ''' simple one layer model for car racing '''
    def __init__(self, load_model=True):

        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)

        self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)

        if load_model:
            self.vae.load_json('vae/vae.json')
            self.rnn.load_json('rnn/rnn.json')

        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True

        self.input_size = rnn_output_size(EXP_MODE)
        self.z_size = 32

        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            self.hidden_size = 40
            self.weight_hidden = np.random.randn(self.input_size,
                                                 self.hidden_size)
            self.bias_hidden = np.random.randn(self.hidden_size)
            self.weight_output = np.random.randn(self.hidden_size, 2)
            self.bias_output = np.random.randn(3)
            self.param_count = ((self.input_size + 1) *
                                self.hidden_size) + (self.hidden_size * 2 + 2)
        else:
            self.weight = np.random.randn(self.input_size, 2)
            self.bias = np.random.randn(2)
            self.param_count = (self.input_size) * 2 + 2

    def reset(self):
        self.state = rnn_init_state(self.rnn)

    def encode_obs(self, obs):
        # convert raw obs to z, mu, logvar
        result = np.copy(obs).astype(np.float) / 255.0
        result = result.reshape(1, 64, 64, 3)
        mu, logvar = self.vae.encode_mu_logvar(result)
        mu = mu[0]
        logvar = logvar[0]
        s = logvar.shape
        z = mu + np.exp(logvar / 2.0) * np.random.randn(*s)
        return z, mu, logvar

    def get_action(self, z):
        h = rnn_output(self.state, z, EXP_MODE)
        '''
    action = np.dot(h, self.weight) + self.bias
    action[0] = np.tanh(action[0])
    action[1] = sigmoid(action[1])
    action[2] = clip(np.tanh(action[2]))
    '''
        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            h = np.tanh(np.dot(h, self.weight_hidden) + self.bias_hidden)
            action = np.tanh(np.dot(h, self.weight_output) + self.bias_output)
        else:
            action = np.tanh(np.dot(h, self.weight) + self.bias)
        action[0] = clip(action[0])
        action[1] = clip(action[1])
        self.state = rnn_next_state(self.rnn, z, action, self.state)
        if np.random.uniform(0, 1) < 0.2:
            action = [np.random.uniform(-2, 2), np.random.uniform(-2, 2)]
        return action

    def set_model_params(self, model_params):
        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            params = np.array(model_params)
            cut_off = (self.input_size + 1) * self.hidden_size
            params_1 = params[:cut_off]
            params_2 = params[cut_off:]
            self.bias_hidden = params_1[:self.hidden_size]
            self.weight_hidden = params_1[self.hidden_size:].reshape(
                self.input_size, self.hidden_size)
            self.bias_output = params_2[:2]
            self.weight_output = params_2[2:].reshape(self.hidden_size, 2)
        else:
            self.bias = np.array(model_params[:2])
            self.weight = np.array(model_params[2:]).reshape(
                self.input_size, 2)

    def load_model(self, filename):
        with open(filename) as f:
            data = json.load(f)
        print('loading file %s' % (filename))
        self.data = data
        model_params = np.array(data[0])  # assuming other stuff is in data
        self.set_model_params(model_params)

    def get_random_model_params(self, stdev=0.1):
        #return np.random.randn(self.param_count)*stdev
        return np.random.standard_cauchy(
            self.param_count) * stdev  # spice things up

    def init_random_model_params(self, stdev=0.1):
        params = self.get_random_model_params(stdev=stdev)
        self.set_model_params(params)
        vae_params = self.vae.get_random_model_params(stdev=stdev)
        self.vae.set_model_params(vae_params)
        rnn_params = self.rnn.get_random_model_params(stdev=stdev)
        self.rnn.set_model_params(rnn_params)
예제 #7
0
class Model:
  ''' simple one layer model for car racing '''
  def __init__(self):
    self.env_name = "carracing"
    self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True)
    self.vae.load_json('vae/vae.json')
    self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)
    self.rnn.load_json('rnn/rnn.json')
    self.state = rnn_init_state(self.rnn)
    self.rnn_mode = True

    self.input_size = rnn_output_size(EXP_MODE)
    self.z_size = 32

    if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer
      self.hidden_size = 40
      self.weight_hidden = np.random.randn(self.input_size, self.hidden_size)
      self.bias_hidden = np.random.randn(self.hidden_size)
      self.weight_output = np.random.randn(self.hidden_size, 3)
      self.bias_output = np.random.randn(3)
      self.param_count = ((self.input_size+1)*self.hidden_size) + (self.hidden_size*3+3)
    else:
      self.weight = np.random.randn(self.input_size, 3)
      self.bias = np.random.randn(3)
      self.param_count = (self.input_size)*3+3

    self.render_mode = False

  def make_env(self, seed=-1, render_mode=False):
    self.render_mode = render_mode
    self.env = make_env(self.env_name, seed=seed, render_mode=render_mode)

  def reset(self):
    self.state = rnn_init_state(self.rnn)

  def encode_obs(self, obs):
    # convert raw obs to z, mu, logvar
    result = np.copy(obs).astype(np.float)/255.0
    result = result.reshape(1, 64, 64, 3)
    mu, logvar = self.vae.encode_mu_logvar(result)
    mu = mu[0]
    logvar = logvar[0]
    s = logvar.shape
    z = mu + np.exp(logvar/2.0) * np.random.randn(*s)
    return z, mu, logvar

  def decode_obs(self, z):
    # decode the latent vector
    img = self.vae.decode(z.reshape(1, self.z_size)) * 255.
    img = np.round(img).astype(np.uint8)
    img = img.reshape(64, 64, 3)
    return img

  def get_action(self, z):
    h = rnn_output(self.state, z, EXP_MODE)

    '''
    action = np.dot(h, self.weight) + self.bias
    action[0] = np.tanh(action[0])
    action[1] = sigmoid(action[1])
    action[2] = clip(np.tanh(action[2]))
    '''
    if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer
      h = np.tanh(np.dot(h, self.weight_hidden) + self.bias_hidden)
      action = np.tanh(np.dot(h, self.weight_output) + self.bias_output)
    else:
      action = np.tanh(np.dot(h, self.weight) + self.bias)
    
    action[1] = (action[1]+1.0) / 2.0
    action[2] = clip(action[2])

    self.state = rnn_next_state(self.rnn, z, action, self.state)

    return action

  def set_model_params(self, model_params):
    if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer
      params = np.array(model_params)
      cut_off = (self.input_size+1)*self.hidden_size
      params_1 = params[:cut_off]
      params_2 = params[cut_off:]
      self.bias_hidden = params_1[:self.hidden_size]
      self.weight_hidden = params_1[self.hidden_size:].reshape(self.input_size, self.hidden_size)
      self.bias_output = params_2[:3]
      self.weight_output = params_2[3:].reshape(self.hidden_size, 3)
    else:
      self.bias = np.array(model_params[:3])
      self.weight = np.array(model_params[3:]).reshape(self.input_size, 3)

  def load_model(self, filename):
    with open(filename) as f:    
      data = json.load(f)
    print('loading file %s' % (filename))
    self.data = data
    model_params = np.array(data[0]) # assuming other stuff is in data
    self.set_model_params(model_params)

  def get_random_model_params(self, stdev=0.1):
    return np.random.randn(self.param_count)*stdev
예제 #8
0
def sample_vae2(args):
    """ For vae from https://github.com/hardmaru/WorldModelsExperiments.git
    """
    z_size = 64  # This needs to match the size of the trained vae
    batch_size = args.count
    learning_rate = 0.0001
    kl_tolerance = 0.5
    model_path_name = "tf_vae"

    reset_graph()
    vae = ConvVAE(
        z_size=z_size,
        batch_size=batch_size,
        learning_rate=learning_rate,
        kl_tolerance=kl_tolerance,
        is_training=False,
        reuse=False,
        gpu_mode=False)  # use GPU on batchsize of 1000 -> much faster

    vae.load_json(os.path.join(model_path_name, 'vae.json'))

    z = np.random.normal(size=(args.count, z_size))
    samples = vae.decode(z)
    input_dim = samples.shape[1:]

    gen = DriveDataGenerator(args.dirs,
                             image_size=(64, 64),
                             batch_size=args.count,
                             shuffle=True,
                             max_load=10000,
                             images_only=True)
    orig = gen[0].astype(np.float) / 255.0
    #mu, logvar = vae.encode_mu_logvar(orig)
    #recon = vae.decode( mu )
    recon = vae.decode(vae.encode(orig))

    n = args.count
    plt.figure(figsize=(20, 6), tight_layout=False)
    plt.title('VAE samples')
    for i in range(n):
        ax = plt.subplot(3, n, i + 1)
        plt.imshow(samples[i].reshape(input_dim[0], input_dim[1],
                                      input_dim[2]))
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
        if 0 == i:
            ax.set_title("Random")

    for i in range(n):
        ax = plt.subplot(3, n, n + i + 1)
        plt.imshow(orig[i].reshape(input_dim[0], input_dim[1], input_dim[2]))
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
        if 0 == i:
            ax.set_title("Real")

        ax = plt.subplot(3, n, (2 * n) + i + 1)
        plt.imshow(recon[i].reshape(input_dim[0], input_dim[1], input_dim[2]))
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)
        if 0 == i:
            ax.set_title("Reconstructed")

    plt.savefig("samples_vae.png")
    plt.show()
예제 #9
0
filelist.sort()
filelist = filelist[0:NUM_DATA]

dataset, action_dataset = load_raw_data_list(filelist)

reset_graph()

vae = ConvVAE(z_size=z_size,
              batch_size=batch_size,
              learning_rate=learning_rate,
              kl_tolerance=kl_tolerance,
              is_training=False,
              reuse=False,
              gpu_mode=True)  # use GPU on batchsize of 1000 -> much faster

vae.load_json(os.path.join(model_path_name, args.name + '_vae.json'))

mu_dataset = []
logvar_dataset = []
for i in range(len(dataset)):
    data_batch = dataset[i]
    mu, logvar, z = encode_batch(data_batch)
    mu_dataset.append(mu.astype(np.float16))
    logvar_dataset.append(logvar.astype(np.float16))
    if ((i + 1) % 100 == 0):
        print(i + 1)

action_dataset = np.array(action_dataset)
mu_dataset = np.array(mu_dataset)
logvar_dataset = np.array(logvar_dataset)
예제 #10
0
class VAERacingStack(CarRacing):
    def __init__(self, full_episode=False, discrete_mode=False):
        super(VAERacingStack, self).__init__()
        self._internal_counter = 0
        self.z_size = games['vae_racing_stack'].input_size
        self.vae = ConvVAE(batch_size=1,
                           z_size=self.z_size,
                           num_channel=FRAME_STACK,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)
        self.vae.load_json('vae/vae_stack_' + str(FRAME_STACK) + '.json')
        self.full_episode = full_episode
        high = np.array([np.inf] * self.z_size)
        self.observation_space = Box(-high, high)
        self.cumulative_frames = None
        self._has_rendered = False
        self.discrete_mode = discrete_mode

    def _get_image(self, z, cumulative_frames):
        large_img = np.zeros((64 * 2, 64 * FRAME_STACK))

        # decode the latent vector
        if z is not None:
            img = self.vae.decode(z.reshape(1, self.z_size)) * 255.0
            img = np.round(img).astype(np.uint8)
            img = img.reshape(64, 64, FRAME_STACK)
            for i in range(FRAME_STACK):
                large_img[64:, i * 64:(i + 1) * 64] = img[:, :, i]

        if len(cumulative_frames) == FRAME_STACK:
            for i in range(FRAME_STACK):
                large_img[:64, i * 64:(i + 1) * 64] = cumulative_frames[i]

        large_img = large_img.astype(np.uint8)

        return large_img

    def _reset(self):
        self._internal_counter = 0
        self.cumulative_frames = None
        self._has_rendered = False
        return super(VAERacingStack, self)._reset()

    def _render(self, mode='human', close=False):
        if mode == 'human' or mode == 'rgb_array':
            self._has_rendered = True
        return super(VAERacingStack, self)._render(mode=mode, close=close)

    def _step(self, action):

        if not self._has_rendered:
            self._render("rgb_array")
            self._has_rendered = False

        if action is not None:
            if not self.discrete_mode:
                action[0] = _clip(action[0], lo=-1.0, hi=+1.0)
                action[1] = _clip(action[1], lo=-1.0, hi=+1.0)
                action[1] = (action[1] + 1.0) / 2.0
                action[2] = _clip(action[2])
            else:
                '''
        in discrete setting:
        if action[0] is the highest, then agent does nothing
        if action[1] is the highest, then agent hits the pedal
        if -action[1] is the highest, then agent hits the brakes
        if action[2] is the highest, then agent turns left
        if action[3] is the highest, then agent turns right
        '''
                logits = [
                    _clip((action[0] + 1.0), hi=+2.0),
                    _clip(action[1]),
                    _clip(-action[1]),
                    _clip(action[2]),
                    _clip(-action[2])
                ]
                probs = softmax(logits)
                #chosen_action = np.argmax(logits)
                chosen_action = sample(probs)

                a = np.array([0.0, 0.0, 0.0])

                if chosen_action == 1: a[1] = +1.0  # up
                if chosen_action == 2:
                    a[2] = +0.8  # down: 0.8 as recommended by the environment's built-in demo
                if chosen_action == 3: a[0] = -1.0  # left
                if chosen_action == 4: a[0] = +1.0  # right

                action = a
                #print("chosen_action", chosen_action, action)

        obs, reward, done, _ = super(VAERacingStack, self)._step(action)

        if self.cumulative_frames is not None:
            self.cumulative_frames.pop(0)
            self.cumulative_frames.append(_process_frame_green(obs))
        else:
            self.cumulative_frames = [_process_frame_green(obs)] * FRAME_STACK

        self.z = z = _compress_frames(self.cumulative_frames, self.vae)

        if self.full_episode:
            return z, reward, False, {}

        self._internal_counter += 1
        if self._internal_counter > TIME_LIMIT:
            done = True

        #img = self._get_image(self.z, self.cumulative_frames)
        #imageio.imwrite('dump/'+('%0*d' % (4, self._internal_counter))+'.png', img)

        return z, reward, done, {}
예제 #11
0
class VAERacingWorld(CarRacing):
    def __init__(self, full_episode=False, pure_world=False):
        super(VAERacingWorld, self).__init__()
        self._internal_counter = 0
        self.z_size = games['vae_racing'].input_size
        self.vae = ConvVAE(batch_size=1,
                           z_size=self.z_size,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)
        self.vae.load_json('vae/vae_' + str(self.z_size) + '.json')
        self.full_episode = full_episode
        if pure_world:
            high = np.array([np.inf] * 10)
        else:
            high = np.array([np.inf] * (self.z_size + 10))
        self.observation_space = Box(-high, high)
        self._has_rendered = False
        self.real_frame = None
        self.world_model = SimpleWorldModel(obs_size=16,
                                            action_size=3,
                                            hidden_size=10)
        world_model_path = "./log/learn_vae_racing.cma.4.64.best.json"
        self.world_model.load_model(world_model_path)
        self.pure_world_mode = pure_world

    def _reset(self):
        self._internal_counter = 0
        self._has_rendered = False
        self.real_frame = None
        return super(VAERacingWorld, self)._reset()

    def _render(self, mode='human', close=False):
        if mode == 'human' or mode == 'rgb_array':
            self._has_rendered = True
        return super(VAERacingWorld, self)._render(mode=mode, close=close)

    def _step(self, action):

        if not self._has_rendered:
            self._render("rgb_array")
            self._has_rendered = False

        old_action = [0, 0, 0]

        if action is not None:
            old_action = np.copy(action)
            action[0] = _clip(action[0], lo=-1.0, hi=+1.0)
            action[1] = _clip(action[1], lo=-1.0, hi=+1.0)
            action[1] = (action[1] + 1.0) / 2.0
            action[2] = _clip(action[2])

        obs, reward, done, _ = super(VAERacingWorld, self)._step(action)

        result = np.copy(_process_frame(obs)).astype(np.float) / 255.0
        result = result.reshape(1, 64, 64, 3)
        self.real_frame = result

        #z = self.vae.encode(result).flatten()
        mu, logvar = self.vae.encode_mu_logvar(result)
        mu = mu[0]
        logvar = logvar[0]
        s = logvar.shape
        z = mu + np.exp(logvar / 2.0) * np.random.randn(*s)

        if self.full_episode:
            if MU_MODE:
                return mu, reward, False, {}
            else:
                return z, reward, False, {}

        self._internal_counter += 1
        if self._internal_counter > TIME_LIMIT:
            done = True

        if MU_MODE:
            z = mu

        self.world_model.predict_next_obs(z, old_action)

        if self.pure_world_mode:
            z = np.copy(self.world_model.hidden_state)
        else:
            z = np.concatenate([z, self.world_model.hidden_state], axis=0)

        return z, reward, done, {}
예제 #12
0
파일: model.py 프로젝트: hdilab/pushblock
class Model:
  ''' simple one layer model for car racing '''
  def __init__(self, load_model=True):
    # For Mac
    # self.env_name = "/Users/intuinno/codegit/pushBlock/app/mac/VisualPushBlockContinuous"
    # For linux
    self.env_name = "/home/intuinno/codegit/pushblock/app/linux/pushblock.x86_64"
    self.vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True)

    self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)

    if load_model:
      self.vae.load_json('vae/vae.json')
      self.rnn.load_json('rnn/rnn.json')

    self.state = rnn_init_state(self.rnn)
    self.rnn_mode = True

    self.input_size = rnn_output_size(EXP_MODE)
    self.z_size = 32


    if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer
      self.hidden_size = 40
      self.weight_hidden = np.random.randn(self.input_size, self.hidden_size)
      self.bias_hidden = np.random.randn(self.hidden_size)
      self.weight_output = np.random.randn(self.hidden_size, 3)
      self.bias_output = np.random.randn(3)
      self.param_count = ((self.input_size+1)*self.hidden_size) + (self.hidden_size*3+3)
    else:
      self.weight = np.random.randn(self.input_size, 3)
      self.bias = np.random.randn(3)
      self.param_count = (self.input_size)*3+3

    self.render_mode = False

  def make_env(self, seed=-1, render_mode=False, full_episode=False, workerid=1):
    self.render_mode = render_mode
    self.env = make_env(self.env_name, seed=seed, render_mode=render_mode, full_episode=full_episode, workerid=workerid)

  def reset(self):
    self.state = rnn_init_state(self.rnn)

  def encode_obs(self, obs):
    # convert raw obs to z, mu, logvar
    result = np.copy(obs).astype(np.float)/255.0
    result = result.reshape(1, 64, 64, 3)
    mu, logvar = self.vae.encode_mu_logvar(result)
    mu = mu[0]
    logvar = logvar[0]
    s = logvar.shape
    z = mu + np.exp(logvar/2.0) * np.random.randn(*s)
    return z, mu, logvar

  def get_action(self, z):
    h = rnn_output(self.state, z, EXP_MODE)

    '''
    action = np.dot(h, self.weight) + self.bias
    action[0] = np.tanh(action[0])
    action[1] = sigmoid(action[1])
    action[2] = clip(np.tanh(action[2]))
    '''
    if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer
      h = np.tanh(np.dot(h, self.weight_hidden) + self.bias_hidden)
      action = np.tanh(np.dot(h, self.weight_output) + self.bias_output)
    else:
      action = np.tanh(np.dot(h, self.weight) + self.bias)

    self.state = rnn_next_state(self.rnn, z, action, self.state)

    return action

  def set_model_params(self, model_params):
    if EXP_MODE == MODE_Z_HIDDEN: # one hidden layer
      params = np.array(model_params)
      cut_off = (self.input_size+1)*self.hidden_size
      params_1 = params[:cut_off]
      params_2 = params[cut_off:]
      self.bias_hidden = params_1[:self.hidden_size]
      self.weight_hidden = params_1[self.hidden_size:].reshape(self.input_size, self.hidden_size)
      self.bias_output = params_2[:3]
      self.weight_output = params_2[3:].reshape(self.hidden_size, 3)
    else:
      self.bias = np.array(model_params[:3])
      self.weight = np.array(model_params[3:]).reshape(self.input_size, 3)

  def load_model(self, filename):
    with open(filename) as f:    
      data = json.load(f)
    print('loading file %s' % (filename))
    self.data = data
    model_params = np.array(data[0]) # assuming other stuff is in data
    self.set_model_params(model_params)

  def get_random_model_params(self, stdev=0.1):
    #return np.random.randn(self.param_count)*stdev
    return np.random.standard_cauchy(self.param_count)*stdev # spice things up

  def init_random_model_params(self, stdev=0.1):
    params = self.get_random_model_params(stdev=stdev)
    self.set_model_params(params)
    vae_params = self.vae.get_random_model_params(stdev=stdev)
    self.vae.set_model_params(vae_params)
    rnn_params = self.rnn.get_random_model_params(stdev=stdev)
    self.rnn.set_model_params(rnn_params)
예제 #13
0

if not os.path.exists(output_dir):
    os.mkdir(output_dir)

np.set_printoptions(precision=4, edgeitems=6, linewidth=100, suppress=True)
reset_graph()

# Third, Build the VAE
vae = ConvVAE(z_size=z_size,
              batch_size=1,
              is_training=False,
              reuse=False,
              gpu_mode=False)

vae.load_json(os.path.join('vae', 'vae.json'))


# Fourth, build the RNN
hps_atari_sample = hps_sample._replace(input_seq_width=z_size+na)
OUTWIDTH = hps_atari_sample.output_seq_width
rnn = MDNRNN(hps_atari_sample, gpu_mode=False)
rnn.load_json(os.path.join('rnn', 'rnn.json'))


print("All model loaded.")
# Fifth, run the evaluation. -> We have no predictions about the first frame.

start = time.time()

state = rnn_init_state(rnn) # initialize the state.
예제 #14
0
class DQN:
    """
    DNN agent for Active Inference
    The archtecture consists of P model and A model

    P model(Perception): pretraining VAE encoder part
    A model(Action): DQN solve expected Free energy like Q function

    """

    def __init__(self,
            env,
            batchsize=64,
            input_size=(64,64),
            num_frame_stack=4,
            gamma=0.95,
            frame_skip=1,
            train_freq=4,
            initial_epsilon=1.0,
            min_epsilon=0.1,
            render=True,
            epsilon_decay_steps=int(1e6),
            min_experience_size=int(1e3),
            experience_capacity=int(1e5),
            network_update_freq=5000,
            regularization=1e-6,
            optimizer_params=None,
            action_map=None
        ):

        self.vae = ConvVAE(batch_size=batchsize, gpu_mode=False, is_training=False, reuse=True)
        self.vae.load_json('vae/vae.json')

        if action_map is not None:
            self.dim_actions = len(action_map)
        else:
            self.dim_actions = env.action_space.n

        self.network_update_freq = network_update_freq
        self.action_map = action_map
        self.env = env
        self.batchsize = batchsize
        self.num_frame_stack = num_frame_stack
        self.gamma = gamma
        self.frame_skip = frame_skip
        self.train_freq = train_freq
        self.initial_epsilon = initial_epsilon
        self.min_epsilon = min_epsilon
        self.epsilon_decay_steps = epsilon_decay_steps
        self.render = render
        self.min_experience_size = min_experience_size
        self.input_size = input_size
        self.regularization = regularization
        self.optimizer_params = optimizer_params or dict(learning_rate=0.0004, epsilon=1e-7)
        self.do_training = True
        self.playing_epsilon = 0.0
        self.session = None
        self.state_size = (self.num_frame_stack,) + self.input_size
        self.global_counter = 0
        self.episode_counter =0

    def build_graph(self):
        input_dim_with_batch = (self.batchsize, self.num_frame_stack) + self.input_size
        input_dim_general = (None, self.num_frame_stack) + self.input_size

        self.input_prev_state = tf.placeholder(tf.float32, input_dim_general, "prev_state")
        self.input_next_state = tf.placeholder(tf.float32, input_dim_with_batch, "next_state")
        self.input_reward = tf.placeholder(tf.float32, self.batchsize, "reward")
        self.input_actions = tf.placeholder(tf.int32, self.batchsize, "actions")
        self.input_done_mask = tf.placeholder(tf.int32, self.batchsize, "done_mask")

        # The target Q-values
        with tf.variable_scope("fixed"):
            qsa_targets = self.create_network(self.input_next_state, trainable=False)

        # The estimate Q-values
        with tf.variable_scope("train"):
            qsa_estimates = self.create_network(self.input_prev_state, trainable=True)

        self.best_action = tf.argmax(qsa_estimates, axis=1)

        not_done = tf.cast(tf.logical_not(tf.cast(self.input_done_mask, "bool")), "float32")
        q_target = np.reduce_max(qsa_targets, -1) * self.gamma * not_done + self.input_reward

        action_slice = tf.stack([tf.range(0, self.batchsize), self.input_actions], axis=1)
        q_estimates_for_input_action = tf.gather_nd(qsa_estimates, action_slice)

        training_loss = tf.nn.l2_loss(q_target - q_estimates_for_input_action) / self.batchsize

        optimizer = tf.train.AdamOptimizer(**(self.optimizer_params))

        reg_loss = tf.add_n(tf.losses.get_regularization_losses())
        self.train_op = optimizer.minimize(reg_loss + training_loss)

        train_params = self.get_variables("train")
        fixed_params = self.get_variables("fixed")

        # test
        assert (len(train_params)) == len(fixed_params)
        self.copy_network_ops = [tf.assign(fixed_v, train_v) for train_v, fixed_v in zip(train_params, fixed_params)]

    ### Create estimate Q-network and target Q-network
    def create_network(self, input, trainable):
        if trainable:
            wr = slim.l2l2_regularizer(self.regularization)
        else:
            wr = None

        input_t = tf.transpose(input, [0, 2, 3, 1])
        net = slim.conv2d(input_t, 8, (7, 7), data_format="NHWC", activation_fn=tf.nn.relu, stride=3, weights_regularizer=wr, trainable=trainable)
        net = slim.max_pool2d(net, 2, 2)
        net = slim.conv2d(net, 16, (3, 3), data_format="NHWC", activation_fn=tf.nn.relu, weights_regularizer=wr, trainable=trainable)
        net = slim.max_pool2d(net, 2, 2)
        net = slim.flatten(net)
        net = slim.fully_connected(net, 256, activation_fn=tf.nn.relu, weights_regularizer=wr, trainable=trainable)
        q_state_action_values = slim.fully_connected(net, self.dim_actions, activation_fn=None, weights_regularizer=wr, trainable=trainable)

        return q_state_action_values

    def get_random_action(self):
        return np.random.choice(self.dim_actions)

    def get_epsilon(self):
        if not self.do_training:
            return self.playing_epsilon
        elif self.global_counter >= self.epsilon_decay_steps:
            return self.min_epsilon
        else:
            # linear decay
            r = 1.0 - self.global_counter / float(self.epsilon_decay_steps)
            return self.min_epsilon + (self.initial_epsilon - self.min_epsilon) * r

    ### Training operation with data from Replay Memory
    def train(self):
        batch = self.exp_history.sample_mini_batch(self.batchsize)

        fd = {
            self.input_reward: "reward",
            self.input_prev_state: "prev_state",
            self.input_next_state: "next_state",
            self.input_actions: "actions",
            self.input_done_mask: "done_mask"
        }
        fd1 = {ph: batch[k] for ph, k in fd.items()}
        self.session.run([self.train_op], fd1)

    def play_episode(self):
        # Replay Memory
        eh = (
            self.exp_history if self.do_training
            else self.playing_cache
        )
        total_reward = 0
        frames_in_episode = 0

        # Start environment
        first_frame = self.env.reset()
        first_frame_pp = self.process_image(first_frame)

        eh.start_new_episode(first_frame_pp)

        while True:
            if np.random.rand() > self.get_epsilon():
                action_idx = self.session.run(
                    self.best_action,
                    {self.input_prev_state: eh.current_state()[np.newaxis, ...]}
                )[0]
            else:
                action_idx = self.get_random_action()

            if self.action_map is not None:
                action = self.action_map[action_idx]
            else:
                action = action_idx

            reward = 0
            for _ in range(self.frame_skip):
                observation, r, done, info = self.env.step(action)
                if self.render:
                    self.env.render()
                reward += r
                if done:
                    break

            early_done, punishment = self.check_early_stop(reward, total_reward)
            if early_done:
                reward += punishment

            done = done or early_done

            total_reward += reward
            frames_in_episode += 1

            eh.add_experience(self.process_image(observation), action_idx, done, reward)

            if self.do_training:
                self.global_counter += 1
                if self.global_counter % self.network_update_freq:
                    self.update_target_network()
                train_cond = (
                    self.exp_history.counter >= self.min_experience_size and
                    self.global_counter % self.train_freq == 0
                )
                if train_cond:
                    self.train()

            if done:
                if self.do_training:
                    self.episode_counter += 1

                return total_reward, frames_in_episode
class MiniNetwork(object):
    def __init__(self,
                 sess=None,
                 summary_writer=tf.summary.FileWriter("logs/"),
                 rl_training=False,
                 reuse=False,
                 cluster=None,
                 index=0,
                 device='/gpu:0',
                 ppo_load_path=None,
                 ppo_save_path=None,
                 load_worldmodel=True,
                 ntype='worldmodel'):
        self.policy_model_path_load = ppo_load_path + ntype
        self.policy_model_path_save = ppo_save_path + ntype

        self.rl_training = rl_training

        self.use_norm = True

        self.reuse = reuse
        self.sess = sess
        self.cluster = cluster
        self.index = index
        self.device = device

        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)
        self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)

        if load_worldmodel:
            self.vae.load_json('vae/vae.json')
            self.rnn.load_json('rnn/rnn.json')

        self.input_size = rnn_output_size(EXP_MODE)

        self._create_graph()

        self.rl_saver = tf.train.Saver()
        self.summary_writer = summary_writer

    def initialize(self):
        init_op = tf.global_variables_initializer()
        self.sess.run(init_op)

    def reset_old_network(self):
        self.policy_ppo.assign_policy_parameters()
        self.policy_ppo.reset_mean_returns()

        self.sess.run(self.results_sum.assign(0))
        self.sess.run(self.game_num.assign(0))

    def _create_graph(self):
        if self.reuse:
            tf.get_variable_scope().reuse_variables()
            assert tf.get_variable_scope().reuse

        worker_device = "/job:worker/task:%d" % self.index + self.device
        with tf.device(
                tf.train.replica_device_setter(worker_device=worker_device,
                                               cluster=self.cluster)):
            self.results_sum = tf.get_variable(
                name="results_sum", shape=[], initializer=tf.zeros_initializer)
            self.game_num = tf.get_variable(name="game_num",
                                            shape=[],
                                            initializer=tf.zeros_initializer)

            self.global_steps = tf.get_variable(
                name="global_steps",
                shape=[],
                initializer=tf.zeros_initializer)
            self.win_rate = self.results_sum / self.game_num

            self.mean_win_rate = tf.summary.scalar(
                'mean_win_rate_dis', self.results_sum / self.game_num)
            self.merged = tf.summary.merge([self.mean_win_rate])

            mini_scope = "MiniPolicyNN"
            with tf.variable_scope(mini_scope):
                ob_space = self.input_size
                act_space_array = _SIZE_MINI_ACTIONS
                self.policy = Policy_net('policy', self.sess, ob_space,
                                         act_space_array)
                self.policy_old = Policy_net('old_policy', self.sess, ob_space,
                                             act_space_array)
                self.policy_ppo = PPOTrain('PPO',
                                           self.sess,
                                           self.policy,
                                           self.policy_old,
                                           lr=P.mini_lr,
                                           epoch_num=P.mini_epoch_num)
            var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
            self.policy_saver = tf.train.Saver(var_list=var_list)

    def Update_result(self, result_list):
        win = 0
        for i in result_list:
            if i > 0:
                win += 1
        self.sess.run(self.results_sum.assign_add(win))
        self.sess.run(self.game_num.assign_add(len(result_list)))

    def Update_summary(self, counter):
        print("Update summary........")

        policy_summary = self.policy_ppo.get_summary_dis()
        self.summary_writer.add_summary(policy_summary, counter)

        summary = self.sess.run(self.merged)
        self.summary_writer.add_summary(summary, counter)
        self.sess.run(self.global_steps.assign(counter))

        print("Update summary finished!")

        steps = int(self.sess.run(self.global_steps))
        win_game = int(self.sess.run(self.results_sum))
        all_game = int(self.sess.run(self.game_num))
        win_rate = win_game / float(all_game)

        return steps, win_rate

    def get_win_rate(self):
        return float(self.sess.run(self.win_rate))

    def Update_policy(self, buffer):
        self.policy_ppo.ppo_train_dis(buffer.observations,
                                      buffer.tech_actions,
                                      buffer.rewards,
                                      buffer.values,
                                      buffer.values_next,
                                      buffer.gaes,
                                      buffer.returns,
                                      verbose=False)

    def get_global_steps(self):
        return int(self.sess.run(self.global_steps))

    def save_policy(self):
        self.policy_saver.save(self.sess, self.policy_model_path_save)
        print("policy has been saved in", self.policy_model_path_save)

    def restore_policy(self):
        self.policy_saver.restore(self.sess, self.policy_model_path_load)
        print("Restore policy from", self.policy_model_path_load)
예제 #16
0
class Model:
    ''' simple one layer model for car racing '''
    def __init__(self, load_model=True):
        self.env_name = './VisualPushBlock_withBlock_z_info.x86_64'  #'./VisualPushBlock.x86_64'
        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)
        self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)

        if load_model:
            self.vae.load_json('vae/vae.json')
            self.rnn.load_json('rnn/rnn.json')

        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True

        self.input_size = rnn_output_size(EXP_MODE)
        self.z_size = z_size

        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer ###CHANGE is made here
            self.hidden_size = 40
            self.weight_hidden = np.random.randn(self.input_size,
                                                 self.hidden_size)
            self.bias_hidden = np.random.randn(self.hidden_size)
            self.weight_output = np.random.randn(self.hidden_size, ACTION_SIZE)
            self.bias_output = np.random.randn(ACTION_SIZE)
            self.param_count = ((self.input_size + 1) * self.hidden_size) + (
                self.hidden_size * ACTION_SIZE + ACTION_SIZE)
        else:
            self.weight = np.random.randn(self.input_size, ACTION_SIZE)
            self.bias = np.random.randn(ACTION_SIZE)
            self.param_count = (self.input_size) * ACTION_SIZE + ACTION_SIZE

        self.render_mode = False

    def make_env(self,
                 seed=-1,
                 render_mode=False,
                 full_episode=False,
                 worker_id=0):
        self.render_mode = render_mode
        self.env = make_env(self.env_name,
                            seed=seed,
                            render_mode=render_mode,
                            full_episode=full_episode,
                            worker_id=worker_id)

    def reset(self):
        self.state = rnn_init_state(self.rnn)

    def encode_obs(self, obs):
        # convert raw obs to z, mu, logvar
        #result = np.copy(obs).astype(np.float)/255.0

        result = np.copy(obs).astype(np.float)
        result = result.reshape(1, IMAGE_W, IMAGE_H, 3)
        mu, logvar = self.vae.encode_mu_logvar(result)
        mu = mu[0]
        logvar = logvar[0]
        s = logvar.shape
        z = mu + np.exp(logvar / 2.0) * np.random.randn(*s)
        return z, mu, logvar

    def get_action(self, z):
        h = rnn_output(self.state, z, EXP_MODE)
        #print('h', h.shape, h)
        '''
    action = np.dot(h, self.weight) + self.bias
    action[0] = np.tanh(action[0])
    action[1] = sigmoid(action[1])
    action[2] = clip(np.tanh(action[2]))
    '''
        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            h = np.tanh(np.dot(h, self.weight_hidden) + self.bias_hidden)
            action = np.tanh(np.dot(h, self.weight_output) + self.bias_output)
        else:
            '''print(h.shape)
      print(self.weight.shape)
      print(self.bias.shape)'''
            action = np.tanh(np.dot(h, self.weight) + self.bias)
        '''for i in range(ACTION_SIZE):
      action[i] = (action[i]+1.0) / 2.0 #all actions value are in range 0 to 1'''
        #action[2] = clip(action[2])
        self.state = rnn_next_state(self.rnn, z, action,
                                    self.state)  #update weights of MDN-RNN
        return action

    def set_model_params(self, model_params):
        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            params = np.array(model_params)
            cut_off = (self.input_size + 1) * self.hidden_size
            params_1 = params[:cut_off]
            params_2 = params[cut_off:]
            self.bias_hidden = params_1[:self.hidden_size]
            self.weight_hidden = params_1[self.hidden_size:].reshape(
                self.input_size, self.hidden_size)
            self.bias_output = params_2[:ACTION_SIZE]
            self.weight_output = params_2[ACTION_SIZE:].reshape(
                self.hidden_size, ACTION_SIZE)
        else:
            self.bias = np.array(model_params[:ACTION_SIZE])
            self.weight = np.array(model_params[ACTION_SIZE:]).reshape(
                self.input_size, ACTION_SIZE)

    def load_model(self, filename):
        with open(filename) as f:
            data = json.load(f)
        print('loading file %s' % (filename))
        self.data = data
        model_params = np.array(data[0])  # assuming other stuff is in data
        self.set_model_params(model_params)

    def get_random_model_params(self, stdev=0.1):
        #return np.random.randn(self.param_count)*stdev
        return np.random.standard_cauchy(
            self.param_count) * stdev  # spice things up

    def init_random_model_params(self, stdev=0.1):
        params = self.get_random_model_params(stdev=stdev)
        self.set_model_params(params)
        vae_params = self.vae.get_random_model_params(stdev=stdev)
        self.vae.set_model_params(vae_params)
        rnn_params = self.rnn.get_random_model_params(stdev=stdev)
        self.rnn.set_model_params(rnn_params)
예제 #17
0
class Model:
    ''' simple one layer model for car racing '''
    def __init__(self, arglist, action_space, scope, load_model=True):
        self.action_space = action_space
        self.arglist = arglist
        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)

        hps_sample = hps_model._replace(
            batch_size=1,
            input_seq_width=32 + arglist.action_space +
            (arglist.agent_num - 1) * arglist.action_space * arglist.timestep,
            max_seq_len=1,
            use_recurrent_dropout=0,
            is_training=0)

        self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)

        if load_model:
            self.vae.load_json(arglist.vae_model_dir)
            self.rnn.load_json(arglist.rnn_model_dir)

        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True
        if arglist.inference:
            self.input_size = rnn_output_size(
                EXP_MODE) + (arglist.agent_num - 1) * arglist.action_space
        else:
            self.input_size = rnn_output_size(EXP_MODE)
        self.z_size = 32

        # action trajectories recording
        self.act_traj = [
            collections.deque(np.zeros(
                (arglist.timestep, arglist.action_space)),
                              maxlen=arglist.timestep)
        ] * (arglist.agent_num - 1)
        self.oppo_model = Oppo_Model(arglist.agent_num, arglist.timestep,
                                     arglist.action_space,
                                     arglist.action_space,
                                     "oppo_model_{}".format(scope))
        self.inference = arglist.inference

        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            self.hidden_size = 40
            self.weight_hidden = np.random.randn(self.input_size,
                                                 self.hidden_size)
            self.bias_hidden = np.random.randn(self.hidden_size)
            self.weight_output = np.random.randn(self.hidden_size,
                                                 self.action_space)
            self.bias_output = np.random.randn(self.action_space)
            self.param_count = ((self.input_size + 1) * self.hidden_size) + (
                self.hidden_size * self.action_space + self.action_space)
        else:
            self.weight = np.random.randn(self.input_size, self.action_space)
            self.bias = np.random.randn(self.action_space)
            self.param_count = (
                self.input_size) * self.action_space + self.action_space

    def reset(self):
        self.state = rnn_init_state(self.rnn)
        # self.oppo_state = lstm_init_state(self.oppo_model)

    def encode_obs(self, obs):
        # convert raw obs to z, mu, logvar
        result = np.copy(obs).astype(np.float) / 255.0
        result = result.reshape(1, 64, 64, 3)
        mu, logvar = self.vae.encode_mu_logvar(result)
        mu = mu[0]
        logvar = logvar[0]
        s = logvar.shape
        z = mu + np.exp(logvar / 2.0) * np.random.randn(*s)
        return z, mu, logvar

    def get_action(self, z):
        h = rnn_output(self.state, z, EXP_MODE)

        if self.arglist.inference:
            oppo_intents = []
            for i in range(self.arglist.agent_num - 1):
                act_traj = self.act_traj[i]
                intent = self.oppo_model.get_inference(act_traj)
                oppo_intents.append(intent)
            oppo_intents = np.reshape(
                oppo_intents,
                ((self.arglist.agent_num - 1) * self.arglist.action_space))
            '''
      action = np.dot(h, self.weight) + self.bias
      action[0] = np.tanh(action[0])
      action[1] = sigmoid(action[1])
      action[2] = clip(np.tanh(action[2]))
      '''
            #Oppo intent shape (batch_size, agent_num, action_space)
            # reshape oppo_intent  agent_num * batch_size * action_space

            controller_input = np.concatenate((h, oppo_intents))
        else:
            controller_input = h

        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            x = np.tanh(
                np.dot(controller_input, self.weight_hidden) +
                self.bias_hidden)
            action = np.tanh(np.dot(x, self.weight_output) + self.bias_output)
        else:
            action = np.tanh(np.dot(controller_input, self.weight) + self.bias)
        for i in range(self.action_space):
            action[i] = clip(action[i])

        self.state = rnn_next_state(self.rnn, z, action, self.act_traj,
                                    self.state)
        # self.oppo_state = oppo_next_state(self.oppo_model, action, self.act_traj, self.oppo_state)

        # epsilon exploration
        if np.random.uniform(0, 1) < 0.2:
            action = [np.random.uniform(-3, 3)] * len(action)
        return action

    def set_model_params(self, model_params):
        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            params = np.array(model_params)
            cut_off = (self.input_size + 1) * self.hidden_size
            params_1 = params[:cut_off]
            params_2 = params[cut_off:]
            self.bias_hidden = params_1[:self.hidden_size]
            self.weight_hidden = params_1[self.hidden_size:].reshape(
                self.input_size, self.hidden_size)
            self.bias_output = params_2[:self.action_space]
            self.weight_output = params_2[self.action_space:].reshape(
                self.hidden_size, self.action_space)
        else:
            self.bias = np.array(model_params[:self.action_space])
            self.weight = np.array(model_params[self.action_space:]).reshape(
                self.input_size, self.action_space)

    def load_model(self, filename):
        with open(filename) as f:
            data = json.load(f)
        print('loading file %s' % (filename))
        self.data = data
        model_params = np.array(data[0])  # assuming other stuff is in data
        self.set_model_params(model_params)

    def get_random_model_params(self, stdev=0.1):
        #return np.random.randn(self.param_count)*stdev
        return np.random.standard_cauchy(
            self.param_count) * stdev  # spice things up

    def init_random_model_params(self, stdev=0.1):
        params = self.get_random_model_params(stdev=stdev)
        self.set_model_params(params)
        vae_params = self.vae.get_random_model_params(stdev=stdev)
        self.vae.set_model_params(vae_params)
        rnn_params = self.rnn.get_random_model_params(stdev=stdev)
        self.rnn.set_model_params(rnn_params)
예제 #18
0
class Model:
    ''' simple one layer model for translating game state to actions'''
    def __init__(self, load_model=True):
        self.env_name = "Pong"
        self._make_env()

        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)

        hps_sample_dynamic = hps_sample._replace(num_actions=self.num_actions)
        self.rnn = MDNRNN(hps_sample_dynamic, gpu_mode=False, reuse=True)

        if load_model:
            self.vae.load_json('vae/vae.json')
            self.rnn.load_json('rnn/rnn.json')

        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True

        self.input_size = rnn_output_size(EXP_MODE)
        self.z_size = 32

        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            raise Exception("not ported for atari")
            self.hidden_size = 40
            self.weight_hidden = np.random.randn(self.input_size,
                                                 self.hidden_size)
            self.bias_hidden = np.random.randn(self.hidden_size)
            self.weight_output = np.random.randn(self.hidden_size,
                                                 self.num_actions)
            self.bias_output = np.random.randn(self.num_actions)
            self.param_count = ((self.input_size + 1) * self.hidden_size) + (
                (self.hidden_size + 1) * self.num_actions)
        else:
            # TODO: Not known until env.action_space is queried...
            self.weight = np.random.randn(self.input_size, self.num_actions)
            self.bias = np.random.randn(self.num_actions)
            self.param_count = (self.input_size + 1) * self.num_actions

        self.render_mode = False

    def _make_env(self):
        self.render_mode = render_mode
        self.env = make_env(self.env_name)
        self.num_actions = self.env.action_space.n

    def make_env(self):
        pass  #TODO (Chazzz): eventually remove

    def reset(self):
        self.state = rnn_init_state(self.rnn)

    def encode_obs(self, obs):
        # convert raw obs to z, mu, logvar
        result = np.copy(obs).astype(np.float) / 255.0
        result = result.reshape(1, 64, 64, 1)
        mu, logvar = self.vae.encode_mu_logvar(result)
        mu = mu[0]
        logvar = logvar[0]
        s = logvar.shape
        z = mu + np.exp(logvar / 2.0) * np.random.randn(*s)
        return z, mu, logvar

    def get_action(self, z):
        h = rnn_output(self.state, z, EXP_MODE)
        # print(len(h), " h:", h) #TODO: 256+32 (the 32 comes first)
        # So we could have 288*2*18 params, or 288*2*environment.action_space.n (6 for Pong)
        '''
    action = np.dot(h, self.weight) + self.bias
    action[0] = np.tanh(action[0])
    action[1] = sigmoid(action[1])
    action[2] = clip(np.tanh(action[2]))
    '''
        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            raise Exception("Not ported to atari")
            # h = np.tanh(np.dot(h, self.weight_hidden) + self.bias_hidden)
            # action = np.tanh(np.dot(h, self.weight_output) + self.bias_output)
        else:
            # could probabilistically sample from softmax, but greedy
            action = np.argmax(np.matmul(h, self.weight) + self.bias)

        # action[1] = (action[1]+1.0) / 2.0
        # action[2] = clip(action[2])
        # print("Action:", action)
        action_one_hot = np.zeros(self.num_actions)
        action_one_hot[action] = 1
        # print("Action hot:", action_one_hot)

        self.state = rnn_next_state(self.rnn, z, action_one_hot, self.state)

        return action

    def set_model_params(self, model_params):
        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            params = np.array(model_params)
            cut_off = (self.input_size + 1) * self.hidden_size
            params_1 = params[:cut_off]
            params_2 = params[cut_off:]
            self.bias_hidden = params_1[:self.hidden_size]
            self.weight_hidden = params_1[self.hidden_size:].reshape(
                self.input_size, self.hidden_size)
            self.bias_output = params_2[:self.num_actions]
            self.weight_output = params_2[self.num_actions:].reshape(
                self.hidden_size, self.num_actions)
        else:
            self.bias = np.array(model_params[:self.num_actions])
            self.weight = np.array(model_params[self.num_actions:]).reshape(
                self.input_size, self.num_actions)

    def load_model(self, filename):
        with open(filename) as f:
            data = json.load(f)
        print('loading file %s' % (filename))
        self.data = data
        model_params = np.array(data[0])  # assuming other stuff is in data
        self.set_model_params(model_params)

    def get_random_model_params(self, stdev=0.1):
        #return np.random.randn(self.param_count)*stdev
        return np.random.standard_cauchy(
            self.param_count) * stdev  # spice things up

    def init_random_model_params(self, stdev=0.1):
        params = self.get_random_model_params(stdev=stdev)
        self.set_model_params(params)
        vae_params = self.vae.get_random_model_params(stdev=stdev)
        self.vae.set_model_params(vae_params)
        rnn_params = self.rnn.get_random_model_params(stdev=stdev)
        self.rnn.set_model_params(rnn_params)
예제 #19
0
class Model:
    ''' simple one layer model for car racing '''
    def __init__(self, load_model=True):
        self.env_name = 'Carracing'
        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)

        self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)
        if load_model:
            self.vae.load_json('vae/vae.json')
            self.rnn.load_json('rnn/rnn.json')

        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True

        self.input_size = rnn_output_size(EXP_MODE)
        self.z_size = 16

        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            self.hidden_size = 40
            self.weight_hidden = np.random.randn(self.input_size,
                                                 self.hidden_size)
            self.bias_hidden = np.random.randn(self.hidden_size)
            self.weight_output = np.random.randn(self.hidden_size, 3)
            self.bias_output = np.random.randn(3)
            self.param_count = ((self.input_size + 1) *
                                self.hidden_size) + (self.hidden_size * 3 + 3)
        else:
            self.weight = np.random.randn(self.input_size, 3)
            self.bias = np.random.randn(3)
            self.param_count = (self.input_size) * 3 + 3

        self.render_mode = False

    def make_env(self, client, seed=-1, render_mode=False, full_episode=False):
        self.client = client
        self.render_mode = render_mode
        self.env = TorcsEnv(
            vision=False, throttle=True, gear_change=False
        )  #make_env(self.env_name, seed=seed, render_mode=render_mode, full_episode=full_episode)

    def reset(self):
        self.state = rnn_init_state(self.rnn)

    def encode_obs(self, obs):
        dictlist = []
        for key, value in obs.items():
            if key == 'opponents' or key == 'track' or key == 'wheelSpinVel' or key == 'focus':
                dictlist = dictlist + value
            else:
                dictlist.append(value)
        obs = dictlist
        result = np.copy(obs).astype(np.float) / 255.0
        result = result.reshape(1, 79, 1)
        mu, logvar = self.vae.encode_mu_logvar(result)
        mu = mu[0]
        logvar = logvar[0]
        s = logvar.shape
        z = mu + np.exp(logvar / 2.0) * np.random.randn(*s)
        return z, mu, logvar

    def get_action(self, z):
        h = rnn_output(self.state, z, EXP_MODE)
        '''
    action = np.dot(h, self.weight) + self.bias
    action[0] = np.tanh(action[0])
    action[1] = sigmoid(action[1])
    action[2] = clip(np.tanh(action[2]))
    '''
        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            h = np.tanh(np.dot(h, self.weight_hidden) + self.bias_hidden)
            action = np.tanh(np.dot(h, self.weight_output) + self.bias_output)
        else:
            action = np.tanh(np.dot(h, self.weight) + self.bias)

        action[0] = action[0]
        action[1] = (action[1] + 1.0) / 2.0
        action[2] = clip(action[2])

        self.state = rnn_next_state(self.rnn, z, action, self.state)

        return action

    def set_model_params(self, model_params):
        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            params = np.array(model_params)
            cut_off = (self.input_size + 1) * self.hidden_size
            params_1 = params[:cut_off]
            params_2 = params[cut_off:]
            self.bias_hidden = params_1[:self.hidden_size]
            self.weight_hidden = params_1[self.hidden_size:].reshape(
                self.input_size, self.hidden_size)
            self.bias_output = params_2[:3]
            self.weight_output = params_2[3:].reshape(self.hidden_size, 3)
        else:
            self.bias = np.array(model_params[:3])
            self.weight = np.array(model_params[3:]).reshape(
                self.input_size, 3)

    def load_model(self, filename):
        with open(filename) as f:
            data = json.load(f)
        print('loading file %s' % (filename))
        self.data = data
        model_params = np.array(data[0])  # assuming other stuff is in data
        self.set_model_params(model_params)

    def get_random_model_params(self, stdev=0.1):
        #return np.random.randn(self.param_count)*stdev
        return np.random.standard_cauchy(
            self.param_count) * stdev  # spice things up

    def init_random_model_params(self, stdev=0.1):
        params = self.get_random_model_params(stdev=stdev)
        self.set_model_params(params)
        vae_params = self.vae.get_random_model_params(stdev=stdev)
        self.vae.set_model_params(vae_params)
        rnn_params = self.rnn.get_random_model_params(stdev=stdev)
        self.rnn.set_model_params(rnn_params)
예제 #20
0
filelist.sort()
filelist = filelist[0:1000]

dataset, action_dataset = load_raw_data_list(filelist)

reset_graph()

vae = ConvVAE(z_size=z_size,
              batch_size=batch_size,
              learning_rate=learning_rate,
              kl_tolerance=kl_tolerance,
              is_training=False,
              reuse=False,
              gpu_mode=True)  # use GPU on batchsize of 1000 -> much faster

vae.load_json(os.path.join(model_path_name, 'vae.json'))

mu_dataset = []
logvar_dataset = []
for i in range(len(dataset)):
    data_batch = dataset[i]
    mu, logvar, z = encode_batch(data_batch)
    mu_dataset.append(mu.astype(np.float16))
    logvar_dataset.append(logvar.astype(np.float16))
    if ((i + 1) % 100 == 0):
        print(i + 1)

action_dataset = np.array(action_dataset)
mu_dataset = np.array(mu_dataset)
logvar_dataset = np.array(logvar_dataset)
예제 #21
0
from baselines.ddpg.memory import Memory
from baselines.ddpg.noise import AdaptiveParamNoiseSpec, NormalActionNoise, OrnsteinUhlenbeckActionNoise
from baselines.common import set_global_seeds
import baselines.common.tf_util as U

from baselines import logger
import numpy as np

try:
    from mpi4py import MPI
except ImportError:
    MPI = None

vae = ConvVAE(batch_size=1, gpu_mode=False, is_training=False, reuse=True)
rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)
vae.load_json('vae/vae.json')
rnn.load_json('rnn/rnn.json')


def learn(network, env,
          seed=None,
          total_timesteps=None,
          nb_epochs=None, # with default settings, perform 1M steps total
          nb_epoch_cycles=20,
          nb_rollout_steps=100,
          reward_scale=1.0,
          render=False,
          render_eval=False,
          noise_type='adaptive-param_0.2',
          normalize_returns=False,
          normalize_observations=True,
예제 #22
0
    dataset, action_dataset, oppo_action_dataset = load_raw_data_list(
        filelist, arglist)

    reset_graph()
    if arglist.use_vae:
        vae = ConvVAE(
            z_size=arglist.z_size,
            batch_size=arglist.batch_size,
            learning_rate=arglist.lr,
            kl_tolerance=arglist.kl_tolerance,
            is_training=False,
            reuse=False,
            gpu_mode=True)  # use GPU on batchsize of 1000 -> much faster

        vae.load_json(os.path.join(arglist.vae_path, 'vae.json'))

    mu_dataset = []
    logvar_dataset = []
    action_dataset_real = []
    oppo_action_dataset_real = []
    for i in range(len(dataset)):
        data_batch = dataset[i]
        if len(data_batch) <= arglist.batch_size:
            continue
        else:
            data_batch = data_batch[:arglist.batch_size]
        if arglist.use_vae:
            mu, logvar, z = encode_batch(data_batch, arglist)
            mu_dataset.append(mu.astype(np.float16))
            logvar_dataset.append(logvar.astype(np.float16))
예제 #23
0
class Model:
    ''' simple one layer model for car racing '''
    def __init__(self, arglist):
        self.env_name = arglist.game
        self.vae = ConvVAE(batch_size=1,
                           gpu_mode=False,
                           is_training=False,
                           reuse=True)
        self.vae.load_json(arglist.vae_file)
        self.rnn = MDNRNN(hps_sample, gpu_mode=False, reuse=True)
        self.rnn.load_json(arglist.rnn_file)
        self.state = rnn_init_state(self.rnn)
        self.rnn_mode = True

        self.input_size = rnn_output_size(EXP_MODE)
        self.z_size = 32

        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            self.hidden_size = 40
            self.weight_hidden = np.random.randn(self.input_size,
                                                 self.hidden_size)
            self.bias_hidden = np.random.randn(self.hidden_size)
            self.weight_output = np.random.randn(self.hidden_size, 2)
            self.bias_output = np.random.randn(2)
            self.param_count = ((self.input_size + 1) *
                                self.hidden_size) + (self.hidden_size * 2 + 2)
        else:
            self.weight = np.random.randn(self.input_size, 2)
            self.bias = np.random.randn(2)
            self.param_count = (self.input_size) * 2 + 2

        self.render_mode = False

    def make_env(self, seed=-1, render_mode=False):
        self.render_mode = render_mode
        self.env = make_env(self.env_name, seed=seed, render_mode=render_mode)

    def reset(self):
        self.state = rnn_init_state(self.rnn)

    def encode_obs(self, obs):
        # convert raw obs to z, mu, logvar
        result = np.copy(obs).astype(np.float) / 255.0
        result = result.reshape(1, 64, 64, 3)
        mu, logvar = self.vae.encode_mu_logvar(result)
        mu = mu[0]
        logvar = logvar[0]
        s = logvar.shape
        z = mu + np.exp(logvar / 2.0) * np.random.randn(*s)
        return z, mu, logvar

    def decode_obs(self, z):
        # decode the latent vector
        img = self.vae.decode(z.reshape(1, self.z_size)) * 255.
        img = np.round(img).astype(np.uint8)
        img = img.reshape(64, 64, 3)
        return img

    def get_action(self, z, arglist):
        h = rnn_output(self.state, z, EXP_MODE)
        '''
    action = np.dot(h, self.weight) + self.bias
    action[0] = np.tanh(action[0])
    action[1] = sigmoid(action[1])
    action[2] = clip(np.tanh(action[2]))
    '''
        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            h = np.tanh(np.dot(h, self.weight_hidden) + self.bias_hidden)
            action = np.tanh(np.dot(h, self.weight_output) + self.bias_output)
        else:
            action = np.tanh(np.dot(h, self.weight) + self.bias)

        if arglist.competitive:
            obs, rewards, done, win = self.env.step([action[0], 'script'])
        else:
            obs, rewards, done, win = self.env.step(action)

        extra_reward = 0.0  # penalize for turning too frequently
        if arglist.competitive:
            if arglist.train_mode and penalize_turning:
                extra_reward -= np.abs(action[0]) / 10.0
                rewards[0] += extra_reward
            reward = rewards[0]
        else:
            if arglist.train_mode and penalize_turning:
                reward = np.sum(rewards)
                extra_reward -= np.abs(action[0]) / 10.0
                reward += extra_reward

        # recording_reward.append(reward)
        # total_reward += reward

        self.state = rnn_next_state(self.rnn, z, action, self.state)

        return action

    def set_model_params(self, model_params):
        if EXP_MODE == MODE_Z_HIDDEN:  # one hidden layer
            params = np.array(model_params)
            cut_off = (self.input_size + 1) * self.hidden_size
            params_1 = params[:cut_off]
            params_2 = params[cut_off:]
            self.bias_hidden = params_1[:self.hidden_size]
            self.weight_hidden = params_1[self.hidden_size:].reshape(
                self.input_size, self.hidden_size)
            self.bias_output = params_2[:2]
            self.weight_output = params_2[2:].reshape(self.hidden_size, 2)
        else:
            self.bias = np.array(model_params[:2])
            self.weight = np.array(model_params[2:]).reshape(
                self.input_size, 2)

    def load_model(self, filename):
        with open(filename) as f:
            data = json.load(f)
        print('loading file %s' % (filename))
        self.data = data
        model_params = np.array(data[0])  # assuming other stuff is in data
        self.set_model_params(model_params)

    def get_random_model_params(self, stdev=0.1):
        return np.random.randn(self.param_count) * stdev
예제 #24
0
class VAERacing(CarRacing):
  def __init__(self, full_episode=False):
    super(VAERacing, self).__init__()
    self._internal_counter = 0
    self.z_size = games['vae_racing'].input_size

    #print("vae_racing.py z", self.z_size)

    self.vae = ConvVAE(batch_size=1, z_size=self.z_size, gpu_mode=True, is_training=False, reuse=True)
    #print("vae_racing.py vae", self.vae)
    
    self.vae.load_json('vae/vae_'+str(self.z_size)+'.json')
    self.full_episode = full_episode
    high = np.array([np.inf] * self.z_size)
    self.observation_space = Box(-high, high)
    self._has_rendered = False
    self.real_frame = None

  def reset(self):
    self._internal_counter = 0
    self._has_rendered = False
    self.real_frame = None
    
    obs = super(VAERacing, self).reset()
    
    result = np.copy(_process_frame(obs)).astype(np.float)/255.0
    result = result.reshape(1, 64, 64, 3)
    self.real_frame = result

    mu, logvar = self.vae.encode_mu_logvar(result)
    mu = mu[0]
    logvar = logvar[0]
    s = logvar.shape
    z = mu + np.exp(logvar/2.0) * np.random.randn(*s)

    if MU_MODE:
      return mu
    return z

  def render(self, mode='human', close=False):
    if mode == 'human' or mode == 'rgb_array':
      self._has_rendered = True
    return super(VAERacing, self).render(mode=mode)

  def step(self, action):
    #print("action", action)
    if not self._has_rendered:
      self.render("rgb_array")
      self._has_rendered = False

    if action is not None:
      action[0] = _clip(action[0], lo=-1.0, hi=+1.0)
      action[1] = _clip(action[1], lo=-1.0, hi=+1.0)
      action[1] = (action[1]+1.0) / 2.0
      action[2] = _clip(action[2])

    obs, reward, done, _ = super(VAERacing, self).step(action)

    result = np.copy(_process_frame(obs)).astype(np.float)/255.0
    result = result.reshape(1, 64, 64, 3)
    self.real_frame = result

    #z = self.vae.encode(result).flatten()
    mu, logvar = self.vae.encode_mu_logvar(result)
    mu = mu[0]
    logvar = logvar[0]
    s = logvar.shape
    z = mu + np.exp(logvar/2.0) * np.random.randn(*s)

    if self.full_episode:
      if MU_MODE:
        return mu, reward, False, {}
      else:
        return z, reward, False, {}

    self._internal_counter += 1
    if self._internal_counter > TIME_LIMIT:
      done = True

    if MU_MODE:
      #print("mu", mu)
      return mu, reward, done, {}
    return z, reward, done, {}
예제 #25
0
class Model:
 def __init__(self,load_model=True):
  self.env_name="carracing"
  self.vae=ConvVAE(batch_size=1,gpu_mode=False,is_training=False,reuse=True)
  self.rnn=MDNRNN(hps_sample,gpu_mode=False,reuse=True)
  if load_model:
   self.vae.load_json('vae/vae.json')
   self.rnn.load_json('rnn/rnn.json')
  self.state=rnn_init_state(self.rnn)
  self.rnn_mode=True
  self.input_size=rnn_output_size(EXP_MODE)
  self.z_size=32
  if EXP_MODE==MODE_Z_HIDDEN:
   self.hidden_size=40
   self.weight_hidden=np.random.randn(self.input_size,self.hidden_size)
   self.bias_hidden=np.random.randn(self.hidden_size)
   self.weight_output=np.random.randn(self.hidden_size,3)
   self.bias_output=np.random.randn(3)
   self.param_count=((self.input_size+1)*self.hidden_size)+(self.hidden_size*3+3)
  else:
   self.weight=np.random.randn(self.input_size,3)
   self.bias=np.random.randn(3)
   self.param_count=(self.input_size)*3+3
  self.render_mode=False
 def make_env(self,seed=-1,render_mode=False,full_episode=False):
  self.render_mode=render_mode
  self.env=make_env(self.env_name,seed=seed,render_mode=render_mode,full_episode=full_episode)
 def reset(self):
  self.state=rnn_init_state(self.rnn)
 def encode_obs(self,obs):
  result=np.copy(obs).astype(np.float)/255.0
  result=result.reshape(1,64,64,3)
  mu,logvar=self.vae.encode_mu_logvar(result)
  mu=mu[0]
  logvar=logvar[0]
  s=logvar.shape
  z=mu+np.exp(logvar/2.0)*np.random.randn(*s)
  return z,mu,logvar
 def get_action(self,z):
  h=rnn_output(self.state,z,EXP_MODE)
  if EXP_MODE==MODE_Z_HIDDEN:
   h=np.tanh(np.dot(h,self.weight_hidden)+self.bias_hidden)
   action=np.tanh(np.dot(h,self.weight_output)+self.bias_output)
  else:
   action=np.tanh(np.dot(h,self.weight)+self.bias)
  action[1]=(action[1]+1.0)/2.0
  action[2]=clip(action[2])
  self.state=rnn_next_state(self.rnn,z,action,self.state)
  return action
 def set_model_params(self,model_params):
  if EXP_MODE==MODE_Z_HIDDEN:
   params=np.array(model_params)
   cut_off=(self.input_size+1)*self.hidden_size
   params_1=params[:cut_off]
   params_2=params[cut_off:]
   self.bias_hidden=params_1[:self.hidden_size]
   self.weight_hidden=params_1[self.hidden_size:].reshape(self.input_size,self.hidden_size)
   self.bias_output=params_2[:3]
   self.weight_output=params_2[3:].reshape(self.hidden_size,3)
  else:
   self.bias=np.array(model_params[:3])
   self.weight=np.array(model_params[3:]).reshape(self.input_size,3)
 def load_model(self,filename):
  with open(filename)as f: 
   data=json.load(f)
  print('loading file %s'%(filename))
  self.data=data
  model_params=np.array(data[0])
  self.set_model_params(model_params)
 def get_random_model_params(self,stdev=0.1):
  return np.random.standard_cauchy(self.param_count)*stdev 
 def init_random_model_params(self,stdev=0.1):
  params=self.get_random_model_params(stdev=stdev)
  self.set_model_params(params)
  vae_params=self.vae.get_random_model_params(stdev=stdev)
  self.vae.set_model_params(vae_params)
  rnn_params=self.rnn.get_random_model_params(stdev=stdev)
  self.rnn.set_model_params(rnn_params)