def reward_prediction_mid(input_images): """A reward predictor network from intermediate layers. The inputs can be any image size (usually the intermediate conv outputs). The model runs 3 conv layers on top of each with a dense layer at the end. All of these are combined with 2 additional dense layer. Args: input_images: the input images. size is arbitrary. Returns: the predicted reward. """ encoded = [] for i, x in enumerate(input_images): enc = x enc = tfl.conv2d(enc, 16, [3, 3], strides=(1, 1), activation=tf.nn.relu) enc = tfl.conv2d(enc, 8, [3, 3], strides=(2, 2), activation=tf.nn.relu) enc = tfl.conv2d(enc, 4, [3, 3], strides=(2, 2), activation=tf.nn.relu) enc = tfl.flatten(enc) enc = tfl.dense(enc, 8, activation=tf.nn.relu, name="rew_enc_%d" % i) encoded.append(enc) x = encoded x = tf.stack(x, axis=1) x = tfl.flatten(x) x = tfl.dense(x, 32, activation=tf.nn.relu, name="rew_dense1") x = tfl.dense(x, 16, activation=tf.nn.relu, name="rew_dense2") return x
def make_model(self): #These are already inside make_model(), commenting out ops.reset_default_graph() tf.compat.v1.disable_eager_execution() # Initializing TensorFlow session self.sess = Session(config=ConfigProto(allow_soft_placement=True)) if self.build_model[0][ 'type'] == ModelBuilder.LAYER_INPUT and self.build_model[-1][ 'type'] == ModelBuilder.LAYER_OUTPUT: self.build_model[0]['shape'] = [None, self.state_size] self.build_model[-1]['length'] = self.action_size #Load each layer self.model_layers = [] for layer_model in self.build_model: if layer_model['type'] == ModelBuilder.LAYER_INPUT: if self.build_model.index(layer_model) == 0: self.model_layers.append( placeholder(dtype=tf.float32, shape=layer_model['shape'], name='inputs_')) else: raise IncoherentBuildModelError( "Input Layer must be the first one.") elif layer_model['type'] == ModelBuilder.LAYER_FULLY_CONNECTED: self.model_layers.append( layers.dense(inputs=self.model_layers[-1], units=layer_model['nodes'], activation=tf.nn.relu, name=layer_model['name'])) elif layer_model['type'] == ModelBuilder.LAYER_OUTPUT: self.model_layers.append( layers.dense(inputs=self.model_layers[-1], units=self.action_size, activation=None)) else: raise UnsupportedBuildModelLayerTypeError( "Unsuported Layer Type " + layer_model['type']) #Setup output qsa layer and loss self.tf_qsa = placeholder(shape=[None, self.action_size], dtype=tf.float32) self.loss = tf.losses.mean_squared_error(self.tf_qsa, self.model_layers[-1]) self.optimizer = train.AdamOptimizer(self.learning_rate).minimize( self.loss) #self.logits = layers.dense(self.model_layers[-1], self.action_size) #self._states = placeholder(shape=[None, self.state_size], dtype=tf.float32) self.sess.run(global_variables_initializer()) self.saver = train.Saver()
def __call__(self, x, reuse=True): with tf.variable_scope(self.name) as vs: if reuse: vs.reuse_variables() fc = x fc = tf.reshape(fc, shape=[-1, 56, 56, 3]) fc = layers.conv2d(fc, filters=self.nfilt, kernel_initializer=tf.keras.initializers.glorot_normal(), kernel_size=self.k, padding='same', strides=[self.s,self.s], activation=None, name='h1') #fc = bn(fc, 'eb1') fc = tf.nn.leaky_relu(fc) fc = layers.conv2d(fc, filters=self.nfilt*2, kernel_initializer=tf.keras.initializers.glorot_normal(), kernel_size=self.k, padding='same', strides=[self.s,self.s], activation=None, name='h2') #fc = bn(fc, 'eb2') fc = tf.nn.leaky_relu(fc) fc = layers.conv2d(fc, filters=self.nfilt*4, kernel_initializer=tf.keras.initializers.glorot_normal(), kernel_size=self.k, padding='same', strides=[self.s,self.s], activation=None, name='h3') #fc = bn(fc, 'eb3') fc = tf.nn.leaky_relu(fc) fc = layers.flatten(fc) fc = layers.dense( fc, self.num_at-1, activation=self.act_at, kernel_initializer=tf.keras.initializers.glorot_normal() ) return fc
def __call__(self, z, reuse=True): with tf.variable_scope(self.name) as vs: if reuse: vs.reuse_variables() fc = z fc = tf.keras.layers.GaussianNoise(self.noise_z_std)(fc) fc = layers.dense( fc, 7*7*self.nfilt*4, activation=self.act_at, kernel_initializer=tf.keras.initializers.glorot_normal() ) fc = tf.reshape(fc, [-1, 7, 7, self.nfilt*4]) fc = tf.layers.conv2d_transpose(fc, filters=self.nfilt*2, kernel_initializer=tf.keras.initializers.glorot_normal(), kernel_size=self.k, padding='same', strides=[self.s,self.s], activation=None, name='d1') #fc = bn(fc, 'db1') fc = tf.nn.leaky_relu(fc) fc = tf.layers.conv2d_transpose(fc, filters=self.nfilt, kernel_initializer=tf.keras.initializers.glorot_normal(), kernel_size=self.k, padding='same', strides=[self.s,self.s], activation=None, name='d2') #fc = bn(fc, 'db2') fc = tf.nn.leaky_relu(fc) fc = tf.layers.conv2d_transpose(fc, filters=3, kernel_initializer=tf.keras.initializers.glorot_normal(), kernel_size=self.k, padding='same', strides=[self.s,self.s], activation=None, name='recon') #fc = bn(fc, 'db3') if self.act_out != None: fc = self.act_out(fc) fc = tf.reshape(fc, shape=[-1, self.x_dim]) return fc
def decode_to_shape(inputs, shape, scope): """Encode the given tensor to given image shape.""" with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): x = inputs x = tfl.flatten(x) x = tfl.dense(x, shape[2], activation=None, name="dec_dense") x = tf.expand_dims(x, axis=1) return x
def encode_to_shape(inputs, shape, scope): """Encode the given tensor to given image shape.""" with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): w, h = shape[1], shape[2] x = inputs x = tfl.flatten(x) x = tfl.dense(x, w * h, activation=None, name="enc_dense") x = tf.reshape(x, (-1, w, h, 1)) return x
def cartpole_model(x_input, num_actions, scope, reuse=False): """For CartPole we'll use a smaller network. """ with tf.variable_scope(scope, reuse=reuse): # -------- # Migrated to tf 1.14 # -------- # out = x_input # out = layers.fully_connected(out, num_outputs=32, # activation_fn=tf.nn.tanh) # out = layers.fully_connected(out, num_outputs=32, # activation_fn=tf.nn.tanh) # out = layers.fully_connected(out, num_outputs=num_actions, # activation_fn=None) out = x_input out = layers.dense(out, units=32, activation=tf.nn.tanh) out = layers.dense(out, units=32, activation=tf.nn.tanh) out = layers.dense(out, units=num_actions, activation=None) return out
def graph_conv(_X, _A, O): """ Equation of graph convolution. _X: vector X. Nodes. _A: adjacency matrix. Edges or path. """ out = dense(_X, units=O, use_bias=True) out = tf.matmul(_A, out) out = tf.nn.relu(out) return out
def cdna_transformation(prev_image, cdna_input, num_masks, color_channels, dna_kernel_size, relu_shift): """Apply convolutional dynamic neural advection to previous image. Args: prev_image: previous image to be transformed. cdna_input: hidden lyaer to be used for computing CDNA kernels. num_masks: number of masks and hence the number of CDNA transformations. color_channels: the number of color channels in the images. dna_kernel_size: dna kernel size. relu_shift: shift for ReLU function. Returns: List of images transformed by the predicted CDNA kernels. """ batch_size = tf.shape(cdna_input)[0] height = int(prev_image.get_shape()[1]) width = int(prev_image.get_shape()[2]) # Predict kernels using linear function of last hidden layer. cdna_kerns = tfl.dense(cdna_input, dna_kernel_size * dna_kernel_size * num_masks, name="cdna_params", activation=None) # Reshape and normalize. cdna_kerns = tf.reshape( cdna_kerns, [batch_size, dna_kernel_size, dna_kernel_size, 1, num_masks]) cdna_kerns = (tf.nn.relu(cdna_kerns - relu_shift) + relu_shift) norm_factor = tf.reduce_sum(cdna_kerns, [1, 2, 3], keep_dims=True) cdna_kerns /= norm_factor # Treat the color channel dimension as the batch dimension since the same # transformation is applied to each color channel. # Treat the batch dimension as the channel dimension so that # depthwise_conv2d can apply a different transformation to each sample. cdna_kerns = tf.transpose(cdna_kerns, [1, 2, 0, 4, 3]) cdna_kerns = tf.reshape( cdna_kerns, [dna_kernel_size, dna_kernel_size, batch_size, num_masks]) # Swap the batch and channel dimensions. prev_image = tf.transpose(prev_image, [3, 1, 2, 0]) # Transform image. transformed = tf.nn.depthwise_conv2d(prev_image, cdna_kerns, [1, 1, 1, 1], "SAME") # Transpose the dimensions to where they belong. transformed = tf.reshape( transformed, [color_channels, height, width, batch_size, num_masks]) transformed = tf.transpose(transformed, [3, 1, 2, 0, 4]) transformed = tf.unstack(transformed, axis=-1) return transformed
def reward_prediction_video_conv(frames, rewards, prediction_len): """A reward predictor network from observed/predicted images. The inputs is a list of frames. Args: frames: the list of input images. rewards: previously observed rewards. prediction_len: the length of the reward vector. Returns: the predicted rewards. """ x = tf.concat(frames, axis=-1) x = tfl.conv2d(x, 32, [3, 3], strides=(2, 2), activation=tf.nn.relu) x = tfl.conv2d(x, 32, [3, 3], strides=(2, 2), activation=tf.nn.relu) x = tfl.conv2d(x, 16, [3, 3], strides=(2, 2), activation=tf.nn.relu) x = tfl.conv2d(x, 8, [3, 3], strides=(2, 2), activation=tf.nn.relu) x = tfl.flatten(x) y = tf.concat(rewards, axis=-1) y = tfl.dense(y, 32, activation=tf.nn.relu) y = tfl.dense(y, 16, activation=tf.nn.relu) y = tfl.dense(y, 8, activation=tf.nn.relu) z = tf.concat([x, y], axis=-1) z = tfl.dense(z, 32, activation=tf.nn.relu) z = tfl.dense(z, 16, activation=tf.nn.relu) z = tfl.dense(z, prediction_len, activation=None) z = tf.expand_dims(z, axis=-1) return z
def atari_model(img_in, num_actions, scope, reuse=False): with tf.variable_scope(scope, reuse=reuse): out = img_in with tf.variable_scope("convnet"): # out = layers.convolution2d(out, num_outputs=32, # kernel_size=8, stride=4, activation_fn=tf.nn.relu) # out = layers.convolution2d(out, num_outputs=64, # kernel_size=4, stride=2, activation_fn=tf.nn.relu) # out = layers.convolution2d(out, num_outputs=64, # kernel_size=3, stride=1, activation_fn=tf.nn.relu) # out = layers.flatten(out) print(tf.shape(out)) out = layers.conv2d(out, filters=32, kernel_size=8, strides=(4, 4), activation=tf.nn.relu) print(tf.shape(out)) out = layers.conv2d(out, filters=64, kernel_size=4, strides=(2, 2), activation=tf.nn.relu) print(tf.shape(out)) out = layers.conv2d(out, filters=64, kernel_size=3, strides=(1, 1), activation=tf.nn.relu) print(tf.shape(out)) out = layers.flatten(out) with tf.variable_scope("action_value"): # out = layers.fully_connected(out, num_outputs=512, # activation_fn=tf.nn.relu) # out = layers.fully_connected(out, num_outputs=num_actions, # activation_fn=None) print(tf.shape(out)) out = layers.dense(out, units=512, activation=tf.nn.relu) out = layers.dense(out, units=num_actions, activation=None) return out
def mlp_model(input, num_outputs, scope, reuse=False, num_units=64, rnn_cell=None): # This model takes as input an observation and returns values of all actions with tf.compat.v1.variable_scope(scope, reuse=reuse): out = input out = layers.dense(out, units=num_units, activation=tf.compat.v1.nn.relu) out = layers.dense(out, units=num_units, activation=tf.compat.v1.nn.relu) out = layers.dense(out, units=num_outputs, activation=None) """ out = layers.Dense(num_units, activation=tf.nn.relu)(input) out = layers.Dense(num_units, activation=tf.nn.relu)(out) out = layers.Dense(num_outputs, activation=None)(out) """ return out
def reward_prediction_basic(prediction): """The most simple reward predictor. This works by averaging the pixels and running a dense layer on top. Args: prediction: The predicted image. Returns: the predicted reward. """ x = prediction x = tf.reduce_mean(x, axis=[1, 2], keepdims=True) x = tf.squeeze(x, axis=[1, 2]) x = tfl.dense(x, 128, activation=tf.nn.relu, name="reward_pred") return x
def __call__(self, x, reuse=True): with tf.variable_scope(self.name) as vs: if reuse: vs.reuse_variables() fc = x for idx,out_dim in enumerate(self.layers_dim): if idx == len(self.layers_dim)-1: act_fun = self.act_at else: act_fun = tf.nn.leaky_relu fc = layers.dense( fc, out_dim, activation=act_fun, kernel_initializer=tf.keras.initializers.glorot_normal() ) return fc
def get_q_values_op(self, state, scope, reuse=False): """ Returns Q values for all actions Args: state: (tf tensor) shape = (batch_size, img height, img width, nchannels x config.state_history) scope: (string) scope name, that specifies if target network or not reuse: (bool) reuse of variables in the scope Returns: out: (tf tensor) of shape = (batch_size, num_actions) """ # this information might be useful num_actions = self.env.action_space.n ############################################################## """ TODO: Implement a fully connected with no hidden layer (linear approximation with bias) using tensorflow. HINT: - You may find the following functions useful: - tf.layers.flatten - tf.layers.dense - Make sure to also specify the scope and reuse """ ############################################################## ################ YOUR CODE HERE - 2-3 lines ################## out = layers.flatten(state) out = layers.dense(state,units = num_actions, name = scope, reuse = reuse) ############################################################## ######################## END YOUR CODE ####################### return out
def __call__(self, z, reuse=True, c=None): """Defines the decode meta variables""" with tf.variable_scope(self.name) as vs: if reuse: vs.reuse_variables() fc = z fc = tf.keras.layers.GaussianNoise(self.noise_z_std)(fc) if c != None: """Optionally concat noise(z) to z """ fc = tf.concat([fc, z], axis=1) for idx,out_dim in enumerate(self.layers_dim): """Define activations""" if idx == len(self.layers_dim)-1: act_fun = self.act_out elif idx == 0: act_fun = self.act_at else: act_fun = tf.nn.leaky_relu fc = layers.dense( fc, out_dim, activation=act_fun, kernel_initializer=tf.keras.initializers.glorot_normal() ) return fc
def __init__(self, state_size, action_size, learning_rate, name='DQLearner'): self.state_size = state_size self.action_size = action_size self.learning_rate = learning_rate with v1.variable_scope(name): # We create the placeholders # *state_size means that we take each elements of state_size in tuple hence is like if we wrote # [None, 84, 84, 4] self.inputs_ = v1.placeholder(tf.float32, [None, *state_size], name="inputs") self.actions_ = v1.placeholder(tf.float32, [None, 3], name="actions_") # Remember that target_Q is the R(s,a) + ymax Qhat(s', a') self.target_Q = v1.placeholder(tf.float32, [None], name="target") """ First convnet: CNN BatchNormalization ELU """ # Input is 84x84x4 self.conv1 = v1l.conv2d(inputs=self.inputs_, filters=32, kernel_size=[8, 8], strides=[4, 4], padding="VALID", kernel_initializer=v1.initializers.glorot_uniform(), name="conv1") self.conv1_batchnorm = v1l.batch_normalization(self.conv1, training=True, epsilon=1e-5, name='batch_norm1') self.conv1_out = tf.nn.elu(self.conv1_batchnorm, name="conv1_out") ## --> [20, 20, 32] """ Second convnet: CNN BatchNormalization ELU """ self.conv2 = v1l.conv2d(inputs=self.conv1_out, filters=64, kernel_size=[4, 4], strides=[2, 2], padding="VALID", kernel_initializer=v1.initializers.glorot_uniform(), name="conv2") self.conv2_batchnorm = v1l.batch_normalization(self.conv2, training=True, epsilon=1e-5, name='batch_norm2') self.conv2_out = tf.nn.elu(self.conv2_batchnorm, name="conv2_out") ## --> [9, 9, 64] """ Third convnet: CNN BatchNormalization ELU """ self.conv3 = v1l.conv2d(inputs=self.conv2_out, filters=128, kernel_size=[4, 4], strides=[2, 2], padding="VALID", kernel_initializer=v1.initializers.glorot_uniform(), name="conv3") self.conv3_batchnorm = v1l.batch_normalization(self.conv3, training=True, epsilon=1e-5, name='batch_norm3') self.conv3_out = tf.nn.elu(self.conv3_batchnorm, name="conv3_out") ## --> [3, 3, 128] self.flatten = v1l.flatten(self.conv3_out) ## --> [1152] self.fc = v1l.dense(inputs=self.flatten, units=512, activation=tf.nn.elu, kernel_initializer=v1.initializers.glorot_uniform(), name="fc1") self.output = v1l.dense(inputs=self.fc, kernel_initializer=v1.initializers.glorot_uniform(), units=3, activation=None) # Q is our predicted Q value. self.Q = tf.math.reduce_sum(tf.math.multiply(self.output, self.actions_), axis=1) # The loss is the difference between our predicted Q_values and the Q_target # Sum(Qtarget - Q)^2 self.loss = tf.math.reduce_mean(tf.math.square(self.target_Q - self.Q)) self.optimizer = v1.train.RMSPropOptimizer(self.learning_rate).minimize(self.loss)
def mlp(x, hidden_sizes, activation=tf.nn.relu, output_activation=None): for units in hidden_sizes[:-1]: x = dense(x, units, activation=activation) return dense(x, hidden_sizes[-1], activation=output_activation)
def __init__(self, myScope, h_size, agent, env, trace_length, batch_size, reuse=None, step=False): if step: trace_length = 1 else: trace_length = trace_length with tf.variable_scope(myScope, reuse=reuse): self.batch_size = batch_size zero_state = tf.zeros((batch_size, h_size * 2), dtype=tf.float32) self.gamma_array = tf.placeholder(shape=[1, trace_length], dtype=tf.float32, name='gamma_array') self.gamma_array_inverse = tf.placeholder(shape=[1, trace_length], dtype=tf.float32, name='gamma_array_inv') self.lstm_state = tf.placeholder(shape=[batch_size, h_size * 2], dtype=tf.float32, name='lstm_state') if step: self.state_input = tf.placeholder(shape=[self.batch_size] + env.ob_space_shape, dtype=tf.float32, name='state_input') lstm_state = self.lstm_state else: self.state_input = tf.placeholder( shape=[batch_size * trace_length] + env.ob_space_shape, dtype=tf.float32, name='state_input') lstm_state = zero_state self.sample_return = tf.placeholder(shape=[None, trace_length], dtype=tf.float32, name='sample_return') self.sample_reward = tf.placeholder(shape=[None, trace_length], dtype=tf.float32, name='sample_reward') with tf.variable_scope('input_proc', reuse=reuse): output = layers.conv2d(self.state_input, kernel_size=(3, 3), filters=20, activation=tf.nn.relu, padding='same') output = layers.conv2d(output, kernel_size=(3, 3), filters=20, activation=tf.nn.relu, padding='same') output = layers.flatten(output) print('values', output.get_shape()) self.value = tf.reshape(layers.dense(tf.nn.relu(output), 1), [-1, trace_length]) if step: output_seq = batch_to_seq(output, self.batch_size, 1) else: output_seq = batch_to_seq(output, self.batch_size, trace_length) output_seq, state_output = lstm(output_seq, lstm_state, scope='rnn', nh=h_size) output = seq_to_batch(output_seq) output = layers.dense(output, units=env.NUM_ACTIONS, activation=None) self.log_pi = tf.nn.log_softmax(output) self.lstm_state_output = state_output self.actions = tf.placeholder(shape=[None], dtype=tf.int32, name='actions') self.actions_onehot = tf.one_hot(self.actions, env.NUM_ACTIONS, dtype=tf.float32) predict = tf.multinomial(self.log_pi, 1) self.predict = tf.squeeze(predict) self.next_value = tf.placeholder(shape=[None, 1], dtype=tf.float32, name='next_value') self.next_v = tf.matmul(self.next_value, self.gamma_array_inverse) self.target = self.sample_return + self.next_v self.td_error = tf.square(self.target - self.value) / 2 self.loss = tf.reduce_mean(self.td_error) self.parameters = [] self.value_params = [] for i in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=myScope): if not ('value_params' in i.name): self.parameters.append(i) # i.name if you want just a name if 'input_proc' in i.name: self.value_params.append(i) if not step: self.log_pi_action = tf.reduce_mean(tf.multiply( self.log_pi, self.actions_onehot), reduction_indices=1) self.log_pi_action_bs = tf.reduce_sum( tf.reshape(self.log_pi_action, [-1, trace_length]), 1) self.log_pi_action_bs_t = tf.reshape( self.log_pi_action, [self.batch_size, trace_length]) self.trainer = tf.train.GradientDescentOptimizer(learning_rate=1) self.updateModel = self.trainer.minimize( self.loss, var_list=self.value_params) self.setparams = SetFromFlat(self.parameters) self.getparams = GetFlat(self.parameters) self.param_len = len(self.parameters) for var in self.parameters: print(var.name, var.get_shape())
Y_truth = placeholder(tf.float64, shape=(None, n_labels)) # Function for implementation of H⁽l+1)=sigma(A(AH^lW^l)+ b^l). # With the bias term given by the tf dense layer. def graph_conv(_X, _A, O): """ Equation of graph convolution. _X: vector X. Nodes. _A: adjacency matrix. Edges or path. """ out = dense(_X, units=O, use_bias=True) out = tf.matmul(_A, out) out = tf.nn.relu(out) return out X_new = graph_conv(X, A, 32) print(X_new) gconv1 = graph_conv(X, A, 32) gconv2 = graph_conv(gconv1, A, 32) gconv3 = graph_conv(gconv2, A, 32) Y_pred = tf.nn.softmax(dense(gconv3, units=n_labels, use_bias=True), axis=2) print(Y_pred) Y_pred = tf.reshape(Y_pred, [-1]) loss = tf.reduce_mean(Y_truth*tf.math.log(Y_pred + 1.0 ** -5)) print(loss)