def __init__(self, input_dim, action_dim): super(StochasticActor, self).__init__() self.mu = tf.keras.Sequential([ tf.layers.Dense( units=64, activation='tanh', kernel_initializer=tf.orthogonal_initializer(), input_shape=(input_dim,)), tf.layers.Dense( units=64, activation='tanh', kernel_initializer=tf.orthogonal_initializer()), tf.layers.Dense( units=action_dim, activation=None, kernel_initializer=tf.orthogonal_initializer(0.01)) ]) # We exponentiate the logsig to get sig (hence we don't need softplus). self.logsig = tf.get_variable( name='logsig', shape=[1, action_dim], dtype=tf.float32, initializer=tf.zeros_initializer(), trainable=True)
def __init__(self, input_dim): super(Critic, self).__init__() self.main = tf.keras.Sequential([ tf.layers.Dense(units=64, input_shape=(input_dim, ), activation='tanh', kernel_initializer=tf.orthogonal_initializer()), tf.layers.Dense(units=64, activation='tanh', kernel_initializer=tf.orthogonal_initializer()), tf.layers.Dense(units=1, activation=None, kernel_initializer=tf.orthogonal_initializer()) ])
def dense(self, inputs, output_size, scope="dense", use_bias=True, activation=None): inputs = tf.convert_to_tensor(inputs) shape = inputs.get_shape().as_list() last_dim = shape[-1] rank = len(shape) # initial kernel kernel = tf.get_variable(shape=(last_dim, output_size), initializer=tf.orthogonal_initializer(), name='W') bias = tf.get_variable(shape=(output_size, ), initializer=tf.zeros_initializer(), name='b') with tf.variable_scope(name_or_scope=scope): if rank > 2: # Broadcasting is required for the inputs. outputs = tf.tensordot(inputs, kernel, [[rank - 1], [0]]) else: # Cast the inputs to self.dtype, which is the variable dtype. We do not # cast if `should_cast_variables` is True, as in that case the variable # will be automatically casted to inputs.dtype. outputs = tf.matmul(inputs, kernel) if use_bias: outputs = tf.nn.bias_add(outputs, bias) if activation is not None: return activation(outputs) # pylint: disable=not-callable return outputs
def _prediction_network(self, obs): """Prediction network used by RND to predict to target network output.""" with slim.arg_scope( [slim.conv2d, slim.fully_connected], weights_initializer=tf.orthogonal_initializer(gain=np.sqrt(2)), biases_initializer=tf.zeros_initializer()): net = slim.conv2d(obs, 32, [8, 8], stride=4, activation_fn=tf.nn.leaky_relu) net = slim.conv2d(net, 64, [4, 4], stride=2, activation_fn=tf.nn.leaky_relu) net = slim.conv2d(net, 64, [3, 3], stride=1, activation_fn=tf.nn.leaky_relu) net = slim.flatten(net) net = slim.fully_connected(net, 512, activation_fn=tf.nn.relu) net = slim.fully_connected(net, 512, activation_fn=tf.nn.relu) embedding = slim.fully_connected(net, self.embedding_size, activation_fn=None) return embedding
def fully_connected(self, inputs, output_size, scope="full_connected", is_activation=None): # get feature num shape = inputs.get_shape().as_list() # convolution layer if len(shape) == 4: input_size = shape[-1] * shape[-2] * shape[-3] # dense layers else: input_size = shape[1] with tf.variable_scope(name_or_scope=scope): flat_data = tf.reshape(tensor=inputs, shape=[-1, input_size], name='flatten') weights = tf.get_variable(shape=(input_size, output_size), initializer=tf.orthogonal_initializer(), name='W') biases = tf.get_variable('b', shape=(output_size), initializer=tf.zeros_initializer()) if is_activation is not None: return tf.nn.relu_layer(x=input_size, weights=weights, biases=biases) else: return tf.nn.bias_add(value=tf.matmul(flat_data, weights), bias=biases)
def get_variable_initializer(hparams): """Get variable initializer from hparams.""" if not hparams.initializer: return None mlperf_log.transformer_print(key=mlperf_log.MODEL_HP_INITIALIZER_GAIN, value=hparams.initializer_gain, hparams=hparams) if not tf.executing_eagerly(): tf.logging.info("Using variable initializer: %s", hparams.initializer) if hparams.initializer == "orthogonal": return tf.orthogonal_initializer(gain=hparams.initializer_gain) elif hparams.initializer == "uniform": max_val = 0.1 * hparams.initializer_gain return tf.random_uniform_initializer(-max_val, max_val) elif hparams.initializer == "normal_unit_scaling": return tf.variance_scaling_initializer( hparams.initializer_gain, mode="fan_avg", distribution="normal") elif hparams.initializer == "uniform_unit_scaling": return tf.variance_scaling_initializer( hparams.initializer_gain, mode="fan_avg", distribution="uniform") elif hparams.initializer == "xavier": return tf.initializers.glorot_uniform() else: raise ValueError("Unrecognized initializer: %s" % hparams.initializer)
def conv_layer(inputs, filters, kernel_size, strides, gain=1.0): return tf.layers.conv2d( inputs=inputs, filters=filters, kernel_size=kernel_size, strides=(strides, strides), activation=tf.nn.relu, kernel_initializer=tf.orthogonal_initializer(gain=gain))
def head(endpoints, embedding_dim, is_training): endpoints['emb'] = endpoints['emb_raw'] = slim.fully_connected( endpoints['model_output'], embedding_dim, activation_fn=None, weights_initializer=tf.orthogonal_initializer(), scope='emb') return endpoints
def _create_conv2d_initializer(input_shape, output_channels, kernel_shape, dtype=tf.float32): # pylint: disable=unused-argument """Returns a default initializer for the weights of a convolutional module.""" return { 'w': tf.orthogonal_initializer(), 'b': tf.zeros_initializer(dtype=dtype), }
def __init__(self, input_dim): """Initializes a policy network. Args: input_dim: size of the input space """ super(CriticDDPG, self).__init__() self.main = tf.keras.Sequential([ tf.layers.Dense(units=400, input_shape=(input_dim, ), activation='relu', kernel_initializer=tf.orthogonal_initializer()), tf.layers.Dense(units=300, activation='relu', kernel_initializer=tf.orthogonal_initializer()), tf.layers.Dense(units=1, kernel_initializer=tf.orthogonal_initializer()) ])
def _target_network(self, obs): """Implements the random target network used by RND.""" with slim.arg_scope([slim.conv2d, slim.fully_connected], trainable=False, weights_initializer=tf.orthogonal_initializer( gain=np.sqrt(2)), biases_initializer=tf.zeros_initializer()): net = slim.conv2d(obs, 32, [8, 8], stride=4, activation_fn=tf.nn.leaky_relu) net = slim.conv2d(net, 64, [4, 4], stride=2, activation_fn=tf.nn.leaky_relu) net = slim.conv2d(net, 64, [3, 3], stride=1, activation_fn=tf.nn.leaky_relu) net = slim.flatten(net) embedding = slim.fully_connected(net, self.embedding_size, activation_fn=None) return embedding
def head(endpoints, embedding_dim, is_training): endpoints['head_output'] = slim.fully_connected( endpoints['model_output'], 1024, normalizer_fn=slim.batch_norm, normalizer_params={ 'decay': 0.9, 'epsilon': 1e-5, 'scale': True, 'is_training': is_training, 'updates_collections': tf.GraphKeys.UPDATE_OPS, }) endpoints['emb'] = endpoints['emb_raw'] = slim.fully_connected( endpoints['head_output'], embedding_dim, activation_fn=None, weights_initializer=tf.orthogonal_initializer(), scope='emb') return endpoints
def _create_linear_initializer(input_size, output_size, dtype=tf.float32): # pylint: disable=unused-argument """Returns a default initializer for the weights of a linear module.""" return { 'w': tf.orthogonal_initializer(), 'b': tf.zeros_initializer(dtype=dtype), }
def _build_seperate(self, hp): # Input, target output, and cost mask # Shape: [Time, Batch, Num_units] n_input = hp['n_input'] n_rnn = hp['n_rnn'] n_output = hp['n_output'] self.x = tf.placeholder("float", [None, None, n_input]) self.y = tf.placeholder("float", [None, None, n_output]) self.c_mask = tf.placeholder("float", [None, n_output]) sensory_inputs, rule_inputs = tf.split( self.x, [hp['rule_start'], hp['n_rule']], axis=-1) sensory_rnn_inputs = tf.layers.dense(sensory_inputs, n_rnn, name='sen_input') if 'mix_rule' in hp and hp['mix_rule'] is True: # rotate rule matrix kernel_initializer = tf.orthogonal_initializer() rule_inputs = tf.layers.dense( rule_inputs, hp['n_rule'], name='mix_rule', use_bias=False, trainable=False, kernel_initializer=kernel_initializer) rule_rnn_inputs = tf.layers.dense(rule_inputs, n_rnn, name='rule_input', use_bias=False) rnn_inputs = sensory_rnn_inputs + rule_rnn_inputs # Recurrent activity cell = LeakyRNNCellSeparateInput(n_rnn, hp['alpha'], sigma_rec=hp['sigma_rec'], activation=hp['activation'], w_rec_init=hp['w_rec_init'], rng=self.rng) # Dynamic rnn with time major self.h, states = rnn.dynamic_rnn(cell, rnn_inputs, dtype=tf.float32, time_major=True) # Output h_shaped = tf.reshape(self.h, (-1, n_rnn)) y_shaped = tf.reshape(self.y, (-1, n_output)) # y_hat shape (n_time*n_batch, n_unit) y_hat = tf.layers.dense(h_shaped, n_output, activation=tf.nn.sigmoid, name='output') # Least-square loss self.cost_lsq = tf.reduce_mean( tf.square((y_shaped - y_hat) * self.c_mask)) self.y_hat = tf.reshape(y_hat, (-1, tf.shape(self.h)[1], n_output)) y_hat_fix, y_hat_ring = tf.split(self.y_hat, [1, n_output - 1], axis=-1) self.y_hat_loc = tf_popvec(y_hat_ring)
def dcgan_discriminator(x, flags, scope=None, reuse=None, return_acts=False): """DCGAN-style discriminator network.""" nonlinearity = nonlinearity_fn(flags.nonlinearity_d, True) ds_fs = flags.downsample_conv_filt_size x_fs = flags.extra_conv_filt_size acts = [] with tf.variable_scope(scope, reuse=reuse): if not flags.norm_d: normalizer = None elif flags.algorithm == 'vanilla': normalizer = contrib_slim.batch_norm else: normalizer = contrib_slim.layer_norm if flags.initializer_d == 'xavier': initializer = contrib_layers.xavier_initializer() elif flags.initializer_d == 'orth_gain2': initializer = tf.orthogonal_initializer(gain=2.) elif flags.initializer_d == 'he': initializer = contrib_layers.variance_scaling_initializer() elif flags.initializer_d == 'he_uniform': initializer = contrib_layers.variance_scaling_initializer( uniform=True) out = contrib_slim.conv2d(x, flags.dim_d, ds_fs, scope='conv1', stride=2, activation_fn=nonlinearity, weights_initializer=initializer) acts.append(out) for i in range(flags.extra_depth_d): out = contrib_slim.conv2d(out, flags.dim_d, x_fs, scope='extraconv1.{}'.format(i), activation_fn=nonlinearity, normalizer_fn=normalizer, weights_initializer=initializer) acts.append(out) out = contrib_slim.conv2d(out, 2 * flags.dim_d, ds_fs, scope='conv2', stride=2, activation_fn=nonlinearity, normalizer_fn=normalizer, weights_initializer=initializer) acts.append(out) for i in range(flags.extra_depth_d): out = contrib_slim.conv2d(out, 2 * flags.dim_d, x_fs, scope='extraconv2.{}'.format(i), activation_fn=nonlinearity, normalizer_fn=normalizer, weights_initializer=initializer) acts.append(out) out = contrib_slim.conv2d(out, 4 * flags.dim_d, ds_fs, scope='conv3', stride=2, activation_fn=nonlinearity, normalizer_fn=normalizer, weights_initializer=initializer) acts.append(out) if flags.extra_top_conv: out = contrib_slim.conv2d(out, 4 * flags.dim_d, x_fs, scope='extratopconv', activation_fn=nonlinearity, normalizer_fn=normalizer, weights_initializer=initializer) acts.append(out) out = tf.reshape(out, [-1, 4 * 4 * (4 * flags.dim_d)]) out = contrib_slim.fully_connected(out, 1, scope='fc', activation_fn=None) acts.append(out) if return_acts: return out, acts else: return out
def get_rnn_cell(mode, hps, input_dim, num_units, num_layers=1, dropout=0., mem_input=None, use_beam=False, cell_type="lstm", reuse=None): """Construct RNN cells. Args: mode: train or eval. Keys from tf.estimator.ModeKeys. hps: Hyperparameters. input_dim: input size. num_units: hidden state size. num_layers: number of RNN layers. dropout: drop rate of RNN dropout. mem_input: mem_input use_beam: Use beam search or not. cell_type: [`lstm`, `hyperlsm`]. reuse: Reuse option. Returns: RNN cell. """ cells = [] for i in xrange(num_layers): input_size = input_dim if i == 0 else num_units scale = 1. if cell_type == "lstm": cell = tf.contrib.rnn.LSTMCell( num_units=num_units, initializer=tf.orthogonal_initializer(scale), reuse=reuse) elif cell_type == "gru": cell = tf.contrib.rnn.GRUCell( num_units=num_units, kernel_initializer=tf.orthogonal_initializer(scale), reuse=reuse) elif cell_type == "hyper_lstm": cell = HyperLSTMCell( num_units=num_units, mem_input=mem_input, use_beam=use_beam, initializer=tf.orthogonal_initializer(scale), hps=hps, reuse=reuse) else: assert False if mode == tf_estimator.ModeKeys.TRAIN and dropout > 0.: cell = tf.nn.rnn_cell.DropoutWrapper( cell, input_size=input_size, output_keep_prob=1.0 - dropout, variational_recurrent=True, dtype=tf.float32) if hps.use_residual and num_layers > 1: cell = tf.nn.rnn_cell.ResidualWrapper(cell=cell) cells.append(cell) cell = tf.nn.rnn_cell.MultiRNNCell(cells) return cell
def fc_layer(inputs, units, activations_fn=tf.nn.relu, gain=1.0): return tf.layers.dense(inputs=inputs, units=units, activation=activations_fn, kernel_initializer=tf.orthogonal_initializer(gain))
from __future__ import division from __future__ import print_function from model import HierarchicalProbUNet import tensorflow.compat.v1 as tf _NUM_CLASSES = 2 _BATCH_SIZE = 2 _SPATIAL_SHAPE = [32, 32] _CHANNELS_PER_BLOCK = [5, 7, 9, 11, 13] _IMAGE_SHAPE = [_BATCH_SIZE] + _SPATIAL_SHAPE + [1] _BOTTLENECK_SIZE = _SPATIAL_SHAPE[0] // 2 ** (len(_CHANNELS_PER_BLOCK) - 1) _SEGMENTATION_SHAPE = [_BATCH_SIZE] + _SPATIAL_SHAPE + [_NUM_CLASSES] _LATENT_DIMS = [3, 2, 1] _INITIALIZERS = {'w': tf.orthogonal_initializer(gain=1.0, seed=None), 'b': tf.truncated_normal_initializer(stddev=0.001)} def _get_placeholders(): """Returns placeholders for the image and segmentation.""" img = tf.placeholder(dtype=tf.float32, shape=_IMAGE_SHAPE) seg = tf.placeholder(dtype=tf.float32, shape=_SEGMENTATION_SHAPE) return img, seg class HierarchicalProbUNetTest(tf.test.TestCase): def test_shape_of_sample(self): hpu_net = HierarchicalProbUNet(latent_dims=_LATENT_DIMS, channels_per_block=_CHANNELS_PER_BLOCK,
def __init__(self, num_unique_documents, vocab_size, num_topics, freqs, embedding_size=128, num_sampled=40, learning_rate=1e-3, lmbda=150.0, alpha=None, power=0.75, batch_size=32, clip_gradients=5.0, **kwargs): device = get_device(**kwargs) _graph = tf.Graph() with _graph.as_default(): with tf.device(device): moving_avgs = tf.train.ExponentialMovingAverage(0.9) self.batch_size = batch_size self.freqs = freqs self.X = tf.placeholder(tf.int32, shape=[None]) self.Y = tf.placeholder(tf.int64, shape=[None]) self.DOC = tf.placeholder(tf.int32, shape=[None]) self.switch_loss = tf.Variable(0, trainable=False) train_labels = tf.reshape(self.Y, [-1, 1]) sampler = tf.nn.fixed_unigram_candidate_sampler( train_labels, num_true=1, num_sampled=num_sampled, unique=True, range_max=vocab_size, distortion=power, unigrams=self.freqs, ) self.word_embedding = tf.Variable( tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0)) self.nce_weights = tf.Variable( tf.truncated_normal( [vocab_size, embedding_size], stddev=tf.sqrt(1 / embedding_size), )) self.nce_biases = tf.Variable(tf.zeros([vocab_size])) scalar = 1 / np.sqrt(num_unique_documents + num_topics) self.doc_embedding = tf.Variable( tf.random_normal( [num_unique_documents, num_topics], mean=0, stddev=50 * scalar, )) self.topic_embedding = tf.get_variable( 'topic_embedding', shape=[num_topics, embedding_size], dtype=tf.float32, initializer=tf.orthogonal_initializer(gain=scalar), ) pivot = tf.nn.embedding_lookup(self.word_embedding, self.X) proportions = tf.nn.embedding_lookup(self.doc_embedding, self.DOC) doc = tf.matmul(proportions, self.topic_embedding) doc_context = doc word_context = pivot context = tf.add(word_context, doc_context) loss_word2vec = tf.reduce_mean( tf.nn.nce_loss( weights=self.nce_weights, biases=self.nce_biases, labels=self.Y, inputs=context, num_sampled=num_sampled, num_classes=vocab_size, num_true=1, sampled_values=sampler, )) self.fraction = tf.Variable(1, trainable=False, dtype=tf.float32) n_topics = self.doc_embedding.get_shape()[1].value log_proportions = tf.nn.log_softmax(self.doc_embedding) if alpha is None: alpha = 1.0 / n_topics loss = (alpha - 1) * log_proportions prior = tf.reduce_sum(loss) loss_lda = lmbda * self.fraction * prior global_step = tf.Variable(0, trainable=False, name='global_step') self.cost = tf.cond( global_step < self.switch_loss, lambda: loss_word2vec, lambda: loss_word2vec + loss_lda, ) loss_avgs_op = moving_avgs.apply( [loss_lda, loss_word2vec, self.cost]) with tf.control_dependencies([loss_avgs_op]): optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate) gvs = optimizer.compute_gradients(self.cost) capped_gvs = [( tf.clip_by_value(grad, -clip_gradients, clip_gradients), var, ) for grad, var in gvs] self.optimizer = optimizer.apply_gradients(capped_gvs) self.sess = generate_session(_graph, **kwargs) self.sess.run(tf.global_variables_initializer())
def biaffine_mapping(vector_set_1, vector_set_2, output_size, add_bias_1=True, add_bias_2=True, initializer=None): """Bilinear mapping: maps two vector spaces to a third vector space. The input vector spaces are two 3d matrices: batch size x bucket size x values A typical application of the function is to compute a square matrix representing a dependency tree. The output is for each bucket a square matrix of the form [bucket size, output size, bucket size]. If the output size is set to 1 then results is [bucket size, 1, bucket size] equivalent to a square matrix where the bucket for instance represent the tokens on the x-axis and y-axis. In this way represent the adjacency matrix of a dependency graph (see https://arxiv.org/abs/1611.01734). Args: vector_set_1: vectors of space one vector_set_2: vectors of space two output_size: number of output labels (e.g. edge labels) add_bias_1: Whether to add a bias for input one add_bias_2: Whether to add a bias for input two initializer: Initializer for the bilinear weight map Returns: Output vector space as 4d matrix: batch size x bucket size x output size x bucket size The output could represent an unlabeled dependency tree when the output size is 1 or a labeled tree otherwise. """ with tf.variable_scope('Bilinear'): # Dynamic shape info batch_size = tf.shape(vector_set_1)[0] bucket_size = tf.shape(vector_set_1)[1] if add_bias_1: vector_set_1 = tf.concat( [vector_set_1, tf.ones([batch_size, bucket_size, 1])], axis=2) if add_bias_2: vector_set_2 = tf.concat( [vector_set_2, tf.ones([batch_size, bucket_size, 1])], axis=2) # Static shape info vector_set_1_size = vector_set_1.get_shape().as_list()[-1] vector_set_2_size = vector_set_2.get_shape().as_list()[-1] if not initializer: initializer = tf.orthogonal_initializer() # Mapping matrix bilinear_map = tf.get_variable( 'bilinear_map', [vector_set_1_size, output_size, vector_set_2_size], initializer=initializer) # The matrix operations and reshapings for bilinear mapping. # b: batch size (batch of buckets) # v1, v2: values (size of vectors) # n: tokens (size of bucket) # r: labels (output size), e.g. 1 if unlabeled or number of edge labels. # [b, n, v1] -> [b*n, v1] vector_set_1 = tf.reshape(vector_set_1, [-1, vector_set_1_size]) # [v1, r, v2] -> [v1, r*v2] bilinear_map = tf.reshape(bilinear_map, [vector_set_1_size, -1]) # [b*n, v1] x [v1, r*v2] -> [b*n, r*v2] bilinear_mapping = tf.matmul(vector_set_1, bilinear_map) # [b*n, r*v2] -> [b, n*r, v2] bilinear_mapping = tf.reshape( bilinear_mapping, [batch_size, bucket_size * output_size, vector_set_2_size]) # [b, n*r, v2] x [b, n, v2]T -> [b, n*r, n] bilinear_mapping = tf.matmul(bilinear_mapping, vector_set_2, adjoint_b=True) # [b, n*r, n] -> [b, n, r, n] bilinear_mapping = tf.reshape( bilinear_mapping, [batch_size, bucket_size, output_size, bucket_size]) return bilinear_mapping
def _build_net(self): with tf.variable_scope("Actor" + self.suffix): with tf.name_scope('inputs' + self.suffix): self.tf_obs = tf.placeholder(tf.float32, [None, self.n_features], name='observation' + self.suffix) self.tf_acts = tf.placeholder(tf.int32, [ None, ], name='actions_num' + self.suffix) self.tf_vt = tf.placeholder(tf.float32, [ None, ], name='actions_value' + self.suffix) self.tf_safe = tf.placeholder(tf.float32, [ None, ], name='safety_value' + self.suffix) self.entropy_weight = tf.placeholder( tf.float32, shape=(), name='entropy_weight_clustering' + self.suffix) ##### PPO change ##### self.ppo_ratio = tf.placeholder(tf.float32, [ None, ], name='ppo_ratio' + self.suffix) ##### PPO change ##### layer = tf.layers.dense( inputs=self.tf_obs, units=128, activation=tf.nn.tanh, # kernel_initializer=tf.random_normal_initializer(mean=0, stddev=0.3), kernel_initializer=tf.orthogonal_initializer( gain=np.sqrt(2.)), # ppo default initialization bias_initializer=tf.constant_initializer(0.1), name='fc1' + self.suffix) all_act = tf.layers.dense( inputs=layer, units=self.n_actions, activation=None, # kernel_initializer=tf.random_normal_initializer(mean=0, stddev=0.3), kernel_initializer=tf.orthogonal_initializer( gain=np.sqrt(2.)), # ppo default initialization bias_initializer=tf.constant_initializer(0.1), name='fc2' + self.suffix) self.trainable_variables = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='Actor' + self.suffix) self.trainable_variables_shapes = [ var.get_shape().as_list() for var in self.trainable_variables ] # sampling self.all_act_prob = tf.nn.softmax(all_act, name='act_prob' + self.suffix) self.all_act_prob = tf.clip_by_value(self.all_act_prob, 1e-20, 1.0) with tf.name_scope('loss' + self.suffix): neg_log_prob = tf.reduce_sum( -tf.log(tf.clip_by_value(self.all_act_prob, 1e-30, 1.0)) * tf.one_hot(indices=self.tf_acts, depth=self.n_actions), axis=1) loss = tf.reduce_mean(neg_log_prob * self.tf_vt) loss += self.entropy_weight * tf.reduce_mean( tf.reduce_sum( tf.log(tf.clip_by_value(self.all_act_prob, 1e-30, 1.0)) * self.all_act_prob, axis=1)) self.entro = self.entropy_weight * tf.reduce_mean( tf.reduce_sum( tf.log(tf.clip_by_value(self.all_act_prob, 1e-30, 1.0)) * self.all_act_prob, axis=1)) self.loss = loss with tf.name_scope('train' + self.suffix): self.train_op = tf.train.AdamOptimizer( self.pg_lr).minimize(loss) # safety loss """ * -1? """ self.chosen_action_log_probs = tf.reduce_sum( tf.log(tf.clip_by_value(self.all_act_prob, 1e-30, 1.0)) * tf.one_hot(indices=self.tf_acts, depth=self.n_actions), axis=1) ##### PPO CHANGE ##### self.ppo_old_chosen_action_log_probs = tf.placeholder( tf.float32, [None]) ##### PPO CHANGE ##### self.old_chosen_action_log_probs = tf.stop_gradient( tf.placeholder(tf.float32, [None])) # self.each_safety_loss = tf.exp(self.chosen_action_log_probs - self.old_chosen_action_log_probs) * self.tf_safe self.each_safety_loss = ( tf.exp(self.chosen_action_log_probs) - tf.exp(self.old_chosen_action_log_probs)) * self.tf_safe self.average_safety_loss = tf.reduce_mean( self.each_safety_loss) #/ self.n_episodes tf.reduce_sum # self.average_safety_loss +=self.entro # KL D self.old_all_act_prob = tf.stop_gradient( tf.placeholder(tf.float32, [None, self.n_actions])) def kl(x, y): EPS = 1e-10 x = tf.where(tf.abs(x) < EPS, EPS * tf.ones_like(x), x) y = tf.where(tf.abs(y) < EPS, EPS * tf.ones_like(y), y) X = tf.distributions.Categorical(probs=x + EPS) Y = tf.distributions.Categorical(probs=y + EPS) return tf.distributions.kl_divergence(X, Y, allow_nan_stats=False) self.each_kl_divergence = kl( self.all_act_prob, self.old_all_act_prob ) # tf.reduce_sum(kl(self.all_act_prob, self.old_all_act_prob), axis=1) self.average_kl_divergence = tf.reduce_mean( self.each_kl_divergence) # self.kl_gradients = tf.gradients(self.average_kl_divergence, self.trainable_variables) # useless self.desired_kl = desired_kl # self.metrics = [self.loss, self.average_kl_divergence, self.average_safety_loss, self.entro] # Luping self.metrics = [ self.loss, self.loss, self.average_safety_loss, self.entro ] # Luping # FLat self.flat_params_op = get_flat_params(self.trainable_variables) """not use tensorflow default function, here we calculate the gradient by self: (1) loss: g (2) kl: directional_gradients (math, fisher) (3) safe: b """ ##### PPO change ##### #### PPO Suyi's Change #### with tf.name_scope('ppoloss' + self.suffix): self.ppo_ratio = tf.exp(self.chosen_action_log_probs - self.ppo_old_chosen_action_log_probs) # self.ppo_ratio = tf.Print(self.ppo_ratio, [self.ppo_ratio], "self.ppo_ratio: ") surr = self.ppo_ratio * self.tf_vt self.ppoloss = -tf.reduce_mean( tf.minimum( surr, tf.clip_by_value(self.ppo_ratio, 1. - self.clip_eps, 1. + self.clip_eps) * self.tf_vt)) self.ppoloss += self.entropy_weight * tf.reduce_mean( tf.reduce_sum( tf.log(tf.clip_by_value(self.all_act_prob, 1e-30, 1.0)) * self.all_act_prob, axis=1)) # self.ppoloss += 0.01 * tf.reduce_mean(tf.reduce_sum(tf.log(tf.clip_by_value(self.all_act_prob, 1e-30, 1.0)) * self.all_act_prob, axis=1)) with tf.variable_scope('ppotrain'): # self.atrain_op = tf.train.AdamOptimizer(self.lr).minimize(self.ppoloss) self.atrain_op = tf.train.AdamOptimizer(self.lr).minimize( self.ppoloss) #### PPO Suyi's Change #### self.ppoloss_flat_gradients_op = get_flat_gradients( self.ppoloss, self.trainable_variables) ##### PPO change ##### self.loss_flat_gradients_op = get_flat_gradients( self.loss, self.trainable_variables) self.kl_flat_gradients_op = get_flat_gradients( self.average_kl_divergence, self.trainable_variables) self.constraint_flat_gradients_op = get_flat_gradients( self.average_safety_loss, self.trainable_variables) self.vec = tf.placeholder(tf.float32, [None]) self.fisher_product_op = self.get_fisher_product_op() self.new_params = tf.placeholder(tf.float32, [None]) self.params_assign_op = assign_network_params_op( self.new_params, self.trainable_variables, self.trainable_variables_shapes)
# @time : 2021/03/17 20:10:29 """Tests for the Hierarchical Probabilistic U-Net open-source version""" from model import HierarchicalProbUNet import tensorflow.compat.v1 as tf _NUM_CLASSES = 2 _BATCH_SIZE = 2 _SPATIAL_SHAPE = [32, 32] _CHANNELS_PER_BLOCK = [5, 7, 9, 11, 13] _IMAGE_SHAPE = [_BATCH_SIZE] + _SPATIAL_SHAPE + [1] _BOTTLENECK_SIZE = _SPATIAL_SHAPE[0] // 2**(len(_CHANNELS_PER_BLOCK) - 1) _SEGMENTATION_SHAPE = [_BATCH_SIZE] + _SPATIAL_SHAPE + [_NUM_CLASSES] _LATENT_DIMS = [3, 2, 1] _INITIALIZERS = { 'w': tf.orthogonal_initializer(gain=1.0, seed=None), 'b': tf.truncated_normal_initializer(stddev=0.001) } def _get_placeholders(): """Returns placeholders for the image and segmentation.""" img = tf.placeholder(dtype=tf.float32, shape=_IMAGE_SHAPE) seg = tf.placeholder(dtype=tf.float32, shape=_SEGMENTATION_SHAPE) return img, seg class HierarchicalProbUNetTest(tf.test.TestCase): def test_shape_of_sample(self): hpu_net = HierarchicalProbUNet(latent_dims=_LATENT_DIMS, channels_per_block=_CHANNELS_PER_BLOCK,