def discretize_range(discretize_fn, levels, low, high, thermometer=False): """Get range of discretized values for in the interval (low, high). For example, assume discretize_fn uniformly discretizes the values between 0 and 1 into 10 bins each represented by either a one hot encoding or a thermometer encoding. Then discretize_range(discretize_fn, .3, .7) would return [0., 0., 0., 1., 1., 1., 1., 0., 0., 0.]. Note that it's output is independent of the encoding used. Args: discretize_fn: Discretization function used to discretize input. levels: Number of levels to discretize the input into. low: Minimum value in the interval. high: Maximum value in the interval. thermometer: If True, then the discretize_fn returns thermometer codes, else it returns one hot codes. (Default: False). Returns: Mask of 1's over the interval. """ low = tf.clip_by_value(low, 0., 1.) high = tf.clip_by_value(high, 0., 1.) out = 0. for alpha in np.linspace(0., 1., levels): q = discretize_fn(alpha * low + (1. - alpha) * high, levels, thermometer) # Convert into one hot encoding if q is in thermometer encoding if thermometer: q = discretization_utils.thermometer_to_one_hot(q, levels, flattened=True) out += q return tf.to_float(tf.greater(out, 0.))
def clip_weights_with_threshold(max_threshold): global weights for op,w in weights.items(): if 'conv' in op: weights[op] = tf.clip_by_value(weights[op], -max_threshold, max_threshold, name=None) elif 'fulcon' in op: weights[op] = tf.clip_by_value(weights[op], -max_threshold, max_threshold, name=None)
def _create_loss_and_optimizer(self, inputs, x_reconstr_mean, z_log_sigma_sq, z_mean): # The loss is composed of two terms: # 1.) The reconstruction loss (the negative log probability # of the input under the reconstructed Bernoulli distribution # induced by the decoder in the data space). # This can be interpreted as the number of "nats" required # for reconstructing the input when the activation in latent # is given. # Adding 1e-10 to avoid evaluation of log(0.0) self.reconstr_loss = \ -tf.reduce_sum(inputs * tf.log(tf.clip_by_value(x_reconstr_mean, 1e-9, 1.0)) + (1 - inputs) * tf.log(tf.clip_by_value(1 - x_reconstr_mean, 1e-9, 1.0)), 1) # 2.) The latent loss, which is defined as the Kullback Leibler divergence ## between the distribution in latent space induced by the encoder on # the data and some prior. This acts as a kind of regularize. # This can be interpreted as the number of "nats" required # for transmitting the the latent space distribution given # the prior. self.latent_loss = -0.5 * tf.reduce_sum(1 + z_log_sigma_sq - tf.square(z_mean) - tf.exp(z_log_sigma_sq), 1) loss = tf.reduce_mean(self.reconstr_loss + self.latent_loss) # average over batch optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(loss) return loss, optimizer
def tf_bivariate_normal(y, mu, sigma, rho, n_mixtures, batch_size): mu = tf.verify_tensor_all_finite(mu, "Mu not finite!") y = tf.verify_tensor_all_finite(y, "Y not finite!") delta = tf.sub(tf.tile(tf.expand_dims(y, 1), [1, n_mixtures, 1]), mu) delta = tf.verify_tensor_all_finite(delta, "Delta not finite!") sigma = tf.verify_tensor_all_finite(sigma, "Sigma not finite!") s = tf.reduce_prod(sigma, 2) s = tf.verify_tensor_all_finite(s, "S not finite!") # -1 <= rho <= 1 z = tf.reduce_sum(tf.square(tf.div(delta, sigma + epsilon) + epsilon), 2) - \ 2 * tf.div(tf.mul(rho, tf.reduce_prod(delta, 2)), s + epsilon) z = tf.verify_tensor_all_finite(z, "Z not finite!") # 0 < negRho <= 1 rho = tf.verify_tensor_all_finite(rho, "rho in bivariate normal not finite!") negRho = tf.clip_by_value(1 - tf.square(rho), epsilon, 1.0) negRho = tf.verify_tensor_all_finite(negRho, "negRho not finite!") # Note that if negRho goes near zero, or z goes really large, this explodes. negRho = tf.verify_tensor_all_finite(negRho, "negRho in bivariate normal not finite!") result = tf.clip_by_value(tf.exp(tf.div(-z, 2 * negRho)), 1.0e-8, 1.0e8) result = tf.verify_tensor_all_finite(result, "Result in bivariate normal not finite!") denom = 2 * np.pi * tf.mul(s, tf.sqrt(negRho)) denom = tf.verify_tensor_all_finite(denom, "Denom in bivariate normal not finite!") result = tf.clip_by_value(tf.div(result, denom + epsilon), epsilon, 1.0) result = tf.verify_tensor_all_finite(result, "Result2 in bivariate normal not finite!") return result, delta
def bayes_crossentropy(y_true, y_pred, nb_classes=None, reduction=tf.reduce_mean, name=None): with tf.name_scope(name, "bayes_crossentropy", [y_true, y_pred]): y_pred_shape = y_pred.shape if y_pred_shape.ndims == 1 or y_pred_shape[-1].value == 1: if y_pred_shape.ndims == 1: y_pred = tf.expand_dims(y_pred, -1) y_pred0 = 1. - y_pred y_pred = tf.concat([y_pred0, y_pred], axis=-1) # get number of classes if y_true.shape.ndims == 1: if nb_classes is None: raise Exception('y_pred and y_true must be one_hot encoded, ' 'otherwise you have to provide nb_classes.') y_true = tf.one_hot(y_true, depth=nb_classes) elif nb_classes is None: nb_classes = y_true.shape[1].value # avoid numerical instability with _EPSILON clipping y_pred = tf.clip_by_value(y_pred, EPS, 1.0 - EPS) # ====== check distribution ====== # distribution = tf.reduce_sum(y_true, axis=0) # probability distribution of each class prob_distribution = dimshuffle(distribution / tf.reduce_sum(distribution), ('x', 0)) # we need to clip the prior probability distribution also prob_distribution = tf.clip_by_value(prob_distribution, EPS, 1.0 - EPS) # ====== init confusion info loss ====== # # weighted by y_true loss = y_true * tf.log(y_pred) loss = - 1 / nb_classes * tf.reduce_sum(loss / prob_distribution, axis=1) return reduction(loss)
def translate(U, theta, out_height, out_width): num_batch = tf.shape(U)[0] height, width, num_ch = U.get_shape()[1:] height = height.value width = width.value num_ch = num_ch.value hwc = height*width*num_ch nind = tf.range(num_batch) x = repeat(tf.range(height), width) y = tf.tile(tf.range(width), tf.pack([height])) cind = tf.range(num_ch) nind = tf.expand_dims(repeat(nind, hwc), 1) x = tf.tile(tf.expand_dims(repeat(x, num_ch), 1), tf.pack([num_batch,1])) y = tf.tile(tf.expand_dims(repeat(y, num_ch), 1), tf.pack([num_batch,1])) cind = tf.tile(tf.expand_dims(cind, 1), tf.pack([num_batch*height*width,1])) dx, dy = tf.split(1, 2, theta) dx = tf.cast(tf.clip_by_value(dx, 0, out_height-height), 'int32') dx = tf.reshape(tf.tile(dx, tf.pack([1,hwc])), [-1,1]) dy = tf.cast(tf.clip_by_value(dy, 0, out_width-width), 'int32') dy = tf.reshape(tf.tile(dy, tf.pack([1,hwc])), [-1,1]) x = x + dx y = y + dy tind = tf.concat(1, [nind, x, y, cind]) val = tf.reshape(U, [-1]) T = tf.sparse_to_dense(tind, tf.pack([num_batch, out_height, out_width, num_ch]), val) T.set_shape([None, out_height, out_width, num_ch]) return T
def prob_is_largest(self, Y, mu, var, gh_x, gh_w): # work out what the mean and variance is of the indicated latent function. oh_on = tf.cast(tf.one_hot(tf.reshape(Y, (-1,)), self.num_classes, 1.0, 0.0), float_type) mu_selected = tf.reduce_sum(oh_on * mu, 1) var_selected = tf.reduce_sum(oh_on * var, 1) # generate Gauss Hermite grid X = tf.reshape(mu_selected, (-1, 1)) + gh_x * tf.reshape( tf.sqrt(tf.clip_by_value(2.0 * var_selected, 1e-10, np.inf)), (-1, 1) ) # compute the CDF of the Gaussian between the latent functions and the grid (including the selected function) dist = (tf.expand_dims(X, 1) - tf.expand_dims(mu, 2)) / tf.expand_dims( tf.sqrt(tf.clip_by_value(var, 1e-10, np.inf)), 2 ) cdfs = 0.5 * (1.0 + tf.erf(dist / np.sqrt(2.0))) cdfs = cdfs * (1 - 2e-4) + 1e-4 # blank out all the distances on the selected latent function oh_off = tf.cast(tf.one_hot(tf.reshape(Y, (-1,)), self.num_classes, 0.0, 1.0), float_type) cdfs = cdfs * tf.expand_dims(oh_off, 2) + tf.expand_dims(oh_on, 2) # take the product over the latent functions, and the sum over the GH grid. return tf.matmul(tf.reduce_prod(cdfs, reduction_indices=[1]), tf.reshape(gh_w / np.sqrt(np.pi), (-1, 1)))
def _loss_x_entropy(self, x, z, noise=None): with tf.name_scope("xentropy_loss"): z_clipped = tf.clip_by_value(z, FLAGS.zero_bound, FLAGS.one_bound) z_minus_1_clipped = tf.clip_by_value((1.0 - z), FLAGS.zero_bound, FLAGS.one_bound) x_clipped = tf.clip_by_value(x, FLAGS.zero_bound, FLAGS.one_bound) x_minus_1_clipped = tf.clip_by_value((1.0 - x), FLAGS.zero_bound, FLAGS.one_bound) # cross_entropy = x * log(z) + (1 - x) * log(1 - z) cross_entropy = tf.add(tf.mul(tf.log(z_clipped), x_clipped), tf.mul(tf.log(z_minus_1_clipped), x_minus_1_clipped), name='X-Entr') if noise: with tf.name_scope("Given_Emphasis"): a, b = self._get_emph_params corrupted = tf.select(noise, cross_entropy, tf.zeros_like(cross_entropy), name='Corrupted_Emphasis') # OR -- tf.select(tf.logical_not(noisy_points), cross_entropy, tf.zeros_like(cross_entropy), name='Uncorrupted_Emphasis') uncorrupted = tf.select(noise, tf.zeros_like(cross_entropy), cross_entropy, name='Uncorrupted_Emphasis') loss = a * (-1 * tf.reduce_sum(corrupted, 1)) + b * (-1 * tf.reduce_sum(uncorrupted, 1)) else: # Sum the cost for each example loss = -1 * tf.reduce_sum(cross_entropy, 1) # Reduce mean to find the overall cost of the loss cross_entropy_mean = tf.reduce_mean(loss, name='xentropy_mean') return cross_entropy_mean
def build_decoder(self, input_var): # Build the decoder if len(self.p_layers) > 0: self._decoder = Sequential('vae_decoder') self._decoder += FullyConnected(self.latent_dims, self.p_layers[0], coder_act_fn, name='fc_1') for i in xrange(1, len(self.p_layers)): self._decoder += FullyConnected(self.p_layers[i-1], self.p_layers[i], coder_act_fn, name='fc_%d'%(i+1)) self.decoder = self._decoder(input_var) self._dec_mean = FullyConnected(self.p_layers[-1], self.input_dims, dec_mean_act_fn, name='dec_mean') self.dec_mean = self._dec_mean(self.decoder) self._dec_log_std_sq = FullyConnected(self.p_layers[-1], self.input_dims, mean_std_act_fn, name='dec_std') self.dec_log_std_sq = tf.clip_by_value( self._dec_log_std_sq(self.decoder), -self.sigma_clip, self.sigma_clip ) else: self.decoder = input_var self._dec_mean = FullyConnected(self.latent_dims, self.input_dims, dec_mean_act_fn, name='dec_mean') self.dec_mean = self._dec_mean(self.decoder) self._dec_log_std_sq = FullyConnected(self.latent_dims, self.input_dims, mean_std_act_fn, name='dec_std') self.dec_log_std_sq = tf.clip_by_value( self._dec_log_std_sq(self.decoder), -self.sigma_clip, self.sigma_clip )
def focal_loss(prediction_tensor, target_tensor, weights=None, alpha=0.25, gamma=2): r"""Compute focal loss for predictions. Multi-labels Focal loss formula: FL = -alpha * (z-p)^gamma * log(p) -(1-alpha) * p^gamma * log(1-p) ,which alpha = 0.25, gamma = 2, p = sigmoid(x), z = target_tensor. Args: prediction_tensor: A float tensor of shape [batch_size, num_anchors, num_classes] representing the predicted logits for each class target_tensor: A float tensor of shape [batch_size, num_anchors, num_classes] representing one-hot encoded classification targets weights: A float tensor of shape [batch_size, num_anchors] alpha: A scalar tensor for focal loss alpha hyper-parameter gamma: A scalar tensor for focal loss gamma hyper-parameter Returns: loss: A (scalar) tensor representing the value of the loss function """ sigmoid_p = tf.nn.sigmoid(prediction_tensor) zeros = array_ops.zeros_like(sigmoid_p, dtype=sigmoid_p.dtype) pos_p_sub = array_ops.where(target_tensor >= sigmoid_p, target_tensor - sigmoid_p, zeros) neg_p_sub = array_ops.where(target_tensor > zeros, zeros, sigmoid_p) per_entry_cross_ent = - alpha * (pos_p_sub ** gamma) * tf.log(tf.clip_by_value(sigmoid_p, 1e-8, 1.0)) \ - (1 - alpha) * (neg_p_sub ** gamma) * tf.log(tf.clip_by_value(1.0 - sigmoid_p, 1e-8, 1.0)) return tf.reduce_mean(per_entry_cross_ent)
def _create_cost_function_node(self, model_output, ref_input, regterm=None): """ Create the cost function node. :param model_output: model output node :param ref_input: reference input placeholder node :param regterm: regularization term :return: self """ with tf.name_scope("cost"): if self.loss_func == 'cross_entropy': cost = - tf.reduce_mean(ref_input * tf.log(tf.clip_by_value(model_output, 1e-10, float('inf'))) + (1 - ref_input) * tf.log(tf.clip_by_value(1 - model_output, 1e-10, float('inf')))) elif self.loss_func == 'softmax_cross_entropy': softmax = tf.nn.softmax(model_output) cost = - tf.reduce_mean(ref_input * tf.log(softmax) + (1 - ref_input) * tf.log(1 - softmax)) elif self.loss_func == 'mean_squared': cost = tf.sqrt(tf.reduce_mean(tf.square(ref_input - model_output))) else: cost = None if cost is not None: self.cost = cost + regterm if regterm is not None else cost _ = tf.scalar_summary(self.loss_func, self.cost) else: self.cost = None
def batchnorm(x, gamma, beta, r_mean, r_var): mean, var = tf.nn.moments(x,[0]) update_mean = tf.assign(r_mean,0.9 * r_mean + 0.1 * mean) update_var = tf.assign(r_var,0.9 * r_var + 0.1 * var) with tf.control_dependencies([update_mean,update_var]): return tf.nn.batch_normalization(x,tf.clip_by_value(r_mean,1e-10,100),tf.clip_by_value(r_var,1e-10,100), offset=beta,scale=gamma,variance_epsilon=1e-5)
def cross_entropy(u, label_u, alpha=0.5, normed=False): label_ip = tf.cast( tf.matmul(label_u, tf.transpose(label_u)), tf.float32) s = tf.clip_by_value(label_ip, 0.0, 1.0) # compute balance param # s_t \in {-1, 1} s_t = tf.multiply(tf.add(s, tf.constant(-0.5)), tf.constant(2.0)) sum_1 = tf.reduce_sum(s) sum_all = tf.reduce_sum(tf.abs(s_t)) balance_param = tf.add(tf.abs(tf.add(s, tf.constant(-1.0))), tf.multiply(tf.div(sum_all, sum_1), s)) if normed: # ip = tf.clip_by_value(tf.matmul(u, tf.transpose(u)), -1.5e1, 1.5e1) ip_1 = tf.matmul(u, tf.transpose(u)) def reduce_shaper(t): return tf.reshape(tf.reduce_sum(t, 1), [tf.shape(t)[0], 1]) mod_1 = tf.sqrt(tf.matmul(reduce_shaper(tf.square(u)), reduce_shaper(tf.square(u)), transpose_b=True)) ip = tf.div(ip_1, mod_1) else: ip = tf.clip_by_value(tf.matmul(u, tf.transpose(u)), -1.5e1, 1.5e1) ones = tf.ones([tf.shape(u)[0], tf.shape(u)[0]]) return tf.reduce_mean(tf.multiply(tf.log(ones + tf.exp(alpha * ip)) - s * alpha * ip, balance_param))
def test(): saver.restore(sess, FLAGS.save_dir+'/model.ckpt') batch_x = test_x[0:100] fig = plt.figure('original') plt.gray() plt.axis('off') plt.imshow(batchmat_to_tileimg(batch_x, (height, width), (10, 10))) fig.savefig(FLAGS.save_dir+'/original.png') fa, sa = sess.run([tf.clip_by_value(x_att0, 0, 1), tf.clip_by_value(x_att1, 0, 1)], {x:batch_x}) fig = plt.figure('first att') plt.gray() plt.axis('off') plt.imshow(batchmat_to_tileimg(fa, (N, N), (10, 10))) fig.savefig(FLAGS.save_dir+'/first_attention.png') fig = plt.figure('second att') plt.gray() plt.axis('off') plt.imshow(batchmat_to_tileimg(sa, (N, N), (10, 10))) fig.savefig(FLAGS.save_dir+'/second_attention.png') fr, sr = sess.run([tf.clip_by_value(p0, 0, 1), tf.clip_by_value(p1, 0, 1)], {x:batch_x}) fig = plt.figure('first recon') plt.gray() plt.axis('off') plt.imshow(batchmat_to_tileimg(fr, (height, width), (10, 10))) fig.savefig(FLAGS.save_dir+'/first_recon.png') fig = plt.figure('second recon') plt.gray() plt.axis('off') plt.imshow(batchmat_to_tileimg(sr, (height, width), (10, 10))) fig.savefig(FLAGS.save_dir+'/second_recon.png') fig = plt.figure('reconstructed') plt.gray() plt.axis('off') p_recon = sess.run(p, {x:batch_x}) plt.imshow(batchmat_to_tileimg(p_recon, (height, width), (10, 10))) fig.savefig(FLAGS.save_dir+'/reconstructed.png') p_gen = sess.run(p, {z0_c:np.random.normal(size=(100, n_lat_c)), z0_t:np.random.normal(size=(100, n_lat_t)), z1_c:np.random.normal(size=(100, n_lat_c)), z1_t:np.random.normal(size=(100, n_lat_t))}) I_gen = batchmat_to_tileimg(p_gen, (height, width), (10, 10)) fig = plt.figure('generated') plt.gray() plt.axis('off') plt.imshow(I_gen) fig.savefig(FLAGS.save_dir+'/generated.png') plt.show()
def build_model(self, reuse, dev, ntype): with tf.variable_scope(self.name) and tf.device(dev): if reuse: tf.get_variable_scope().reuse_variables() assert tf.get_variable_scope().reuse # Set inputs of networks self.minimap = tf.placeholder(tf.float32, [None, U.minimap_channel(), self.msize, self.msize], name='minimap') self.screen = tf.placeholder(tf.float32, [None, U.screen_channel(), self.ssize, self.ssize], name='screen') self.info = tf.placeholder(tf.float32, [None, self.isize], name='info') # Build networks net = build_net(self.minimap, self.screen, self.info, self.msize, self.ssize, len(actions.FUNCTIONS), ntype) self.spatial_action, self.non_spatial_action, self.value = net # Set targets and masks self.valid_spatial_action = tf.placeholder(tf.float32, [None], name='valid_spatial_action') self.spatial_action_selected = tf.placeholder(tf.float32, [None, self.ssize**2], name='spatial_action_selected') self.valid_non_spatial_action = tf.placeholder(tf.float32, [None, len(actions.FUNCTIONS)], name='valid_non_spatial_action') self.non_spatial_action_selected = tf.placeholder(tf.float32, [None, len(actions.FUNCTIONS)], name='non_spatial_action_selected') self.value_target = tf.placeholder(tf.float32, [None], name='value_target') # Compute log probability spatial_action_prob = tf.reduce_sum(self.spatial_action * self.spatial_action_selected, axis=1) spatial_action_log_prob = tf.log(tf.clip_by_value(spatial_action_prob, 1e-10, 1.)) non_spatial_action_prob = tf.reduce_sum(self.non_spatial_action * self.non_spatial_action_selected, axis=1) valid_non_spatial_action_prob = tf.reduce_sum(self.non_spatial_action * self.valid_non_spatial_action, axis=1) valid_non_spatial_action_prob = tf.clip_by_value(valid_non_spatial_action_prob, 1e-10, 1.) non_spatial_action_prob = non_spatial_action_prob / valid_non_spatial_action_prob non_spatial_action_log_prob = tf.log(tf.clip_by_value(non_spatial_action_prob, 1e-10, 1.)) self.summary.append(tf.summary.histogram('spatial_action_prob', spatial_action_prob)) self.summary.append(tf.summary.histogram('non_spatial_action_prob', non_spatial_action_prob)) # Compute losses, more details in https://arxiv.org/abs/1602.01783 # Policy loss and value loss action_log_prob = self.valid_spatial_action * spatial_action_log_prob + non_spatial_action_log_prob advantage = tf.stop_gradient(self.value_target - self.value) policy_loss = - tf.reduce_mean(action_log_prob * advantage) value_loss = - tf.reduce_mean(self.value * advantage) self.summary.append(tf.summary.scalar('policy_loss', policy_loss)) self.summary.append(tf.summary.scalar('value_loss', value_loss)) # TODO: policy penalty loss = policy_loss + value_loss # Build the optimizer self.learning_rate = tf.placeholder(tf.float32, None, name='learning_rate') opt = tf.train.RMSPropOptimizer(self.learning_rate, decay=0.99, epsilon=1e-10) grads = opt.compute_gradients(loss) cliped_grad = [] for grad, var in grads: self.summary.append(tf.summary.histogram(var.op.name, var)) self.summary.append(tf.summary.histogram(var.op.name+'/grad', grad)) grad = tf.clip_by_norm(grad, 10.0) cliped_grad.append([grad, var]) self.train_op = opt.apply_gradients(cliped_grad) self.summary_op = tf.summary.merge(self.summary) self.saver = tf.train.Saver(max_to_keep=100)
def _forward(self, x, gpu): hps = self.hps x = tf.to_float(x) x = tf.clip_by_value((x + 0.5) / 256.0, 0.0, 1.0) - 0.5 # Input images are repeated k times on the input. # This is used for Importance Sampling loss (k is number of samples). data_size = hps.batch_size * hps.k x = repeat(x, hps.k) orig_x = x h_size = hps.h_size with arg_scope([conv2d, deconv2d], init=(self.mode == "init")): layers = [] for i in range(hps.depth): layers.append([]) for j in range(hps.num_blocks): downsample = (i > 0) and (j == 0) layers[-1].append(IAFLayer(hps, self.mode, downsample)) h = conv2d("x_enc", x, h_size, [5, 5], [2, 2]) # -> [16, 16] for i, layer in enumerate(layers): for j, sub_layer in enumerate(layer): with tf.variable_scope("IAF_%d_%d" % (i, j)): h = sub_layer.up(h) # top->down self.h_top = h_top = tf.get_variable("h_top", [h_size], initializer=tf.zeros_initializer) h_top = tf.reshape(h_top, [1, -1, 1, 1]) h = tf.tile(h_top, [data_size, 1, hps.image_size / 2 ** len(layers), hps.image_size / 2 ** len(layers)]) kl_cost = kl_obj = 0.0 for i, layer in reversed(list(enumerate(layers))): for j, sub_layer in reversed(list(enumerate(layer))): with tf.variable_scope("IAF_%d_%d" % (i, j)): h, cur_obj, cur_cost = sub_layer.down(h) kl_obj += cur_obj kl_cost += cur_cost if self.mode == "train" and gpu == hps.num_gpus - 1: tf.scalar_summary("model/kl_obj_%02d_%02d" % (i, j), tf.reduce_mean(cur_obj)) tf.scalar_summary("model/kl_cost_%02d_%02d" % (i, j), tf.reduce_mean(cur_cost)) x = tf.nn.elu(h) x = deconv2d("x_dec", x, 3, [5, 5]) x = tf.clip_by_value(x, -0.5 + 1 / 512., 0.5 - 1 / 512.) log_pxz = discretized_logistic(x, self.dec_log_stdv, sample=orig_x) obj = tf.reduce_sum(kl_obj - log_pxz) if self.mode == "train" and gpu == hps.num_gpus - 1: tf.scalar_summary("model/log_pxz", -tf.reduce_mean(log_pxz)) tf.scalar_summary("model/kl_obj", tf.reduce_mean(kl_obj)) tf.scalar_summary("model/kl_cost", tf.reduce_mean(kl_cost)) loss = tf.reduce_sum(compute_lowerbound(log_pxz, kl_cost, hps.k)) return x, obj, loss
def scale(self, x): """Scale x from -0.5 - 0.5 to 0 - 255.""" x = tf.where(tf.is_nan(x), tf.ones_like(x), x) x = tf.where(tf.is_inf(x), tf.ones_like(x), x) x = tf.clip_by_value(x, -0.5, 0.5) x += 0.5 x = x * 2**self.hparams.n_bits_x return tf.cast(tf.clip_by_value(x, 0, 255), dtype=tf.uint8)
def _interpolate2d(imgs, x, y): n_batch = tf.shape(imgs)[0] xlen = tf.shape(imgs)[1] ylen = tf.shape(imgs)[2] n_channel = tf.shape(imgs)[3] x = tf.to_float(x) y = tf.to_float(y) xlen_f = tf.to_float(xlen) ylen_f = tf.to_float(ylen) zero = tf.zeros([], dtype='int32') max_x = tf.cast(xlen - 1, 'int32') max_y = tf.cast(ylen - 1, 'int32') # scale indices from [-1, 1] to [0, xlen/ylen] x = (x + 1.) * (xlen_f - 1.) * 0.5 y = (y + 1.) * (ylen_f - 1.) * 0.5 # do sampling x0 = tf.cast(tf.floor(x), 'int32') x1 = x0 + 1 y0 = tf.cast(tf.floor(y), 'int32') y1 = y0 + 1 x0 = tf.clip_by_value(x0, zero, max_x) x1 = tf.clip_by_value(x1, zero, max_x) y0 = tf.clip_by_value(y0, zero, max_y) y1 = tf.clip_by_value(y1, zero, max_y) base = _repeat(tf.range(n_batch) * xlen * ylen, ylen * xlen) base_x0 = base + x0 * ylen base_x1 = base + x1 * ylen index00 = base_x0 + y0 index01 = base_x0 + y1 index10 = base_x1 + y0 index11 = base_x1 + y1 # use indices to lookup pixels in the flat image and restore # n_channel dim imgs_flat = tf.reshape(imgs, [-1, n_channel]) imgs_flat = tf.to_float(imgs_flat) I00 = tf.gather(imgs_flat, index00) I01 = tf.gather(imgs_flat, index01) I10 = tf.gather(imgs_flat, index10) I11 = tf.gather(imgs_flat, index11) # and finally calculate interpolated values dx = x - tf.to_float(x0) dy = y - tf.to_float(y0) w00 = tf.expand_dims((1. - dx) * (1. - dy), 1) w01 = tf.expand_dims((1. - dx) * dy, 1) w10 = tf.expand_dims(dx * (1. - dy), 1) w11 = tf.expand_dims(dx * dy, 1) output = tf.add_n([w00*I00, w01*I01, w10*I10, w11*I11]) # reshape output = tf.reshape(output, [n_batch, xlen, ylen, n_channel]) return output
def __init__(self, scope, globalAC=None): self.scope = scope if scope == GLOBAL_NET_SCOPE: ## global network only do inference with tf.variable_scope(scope): self.s = tf.placeholder(tf.float32, [None, N_S], 'S') self._build_net() self.a_params = tl.layers.get_variables_with_name(scope + '/actor', True, False) self.c_params = tl.layers.get_variables_with_name(scope + '/critic', True, False) normal_dist = tf.contrib.distributions.Normal(self.mu, self.sigma) # for continuous action space with tf.name_scope('choose_a'): # use local params to choose action self.A = tf.clip_by_value(tf.squeeze(normal_dist.sample(1), axis=0), *A_BOUND) else: ## worker network calculate gradient locally, update on global network with tf.variable_scope(scope): self.s = tf.placeholder(tf.float32, [None, N_S], 'S') self.a_his = tf.placeholder(tf.float32, [None, N_A], 'A') self.v_target = tf.placeholder(tf.float32, [None, 1], 'Vtarget') self._build_net() td = tf.subtract(self.v_target, self.v, name='TD_error') with tf.name_scope('c_loss'): self.c_loss = tf.reduce_mean(tf.square(td)) with tf.name_scope('wrap_a_out'): self.test = self.sigma[0] self.mu, self.sigma = self.mu * A_BOUND[1], self.sigma + 1e-5 normal_dist = tf.contrib.distributions.Normal(self.mu, self.sigma) # for continuous action space with tf.name_scope('a_loss'): log_prob = normal_dist.log_prob(self.a_his) exp_v = log_prob * td entropy = normal_dist.entropy() # encourage exploration self.exp_v = ENTROPY_BETA * entropy + exp_v self.a_loss = tf.reduce_mean(-self.exp_v) with tf.name_scope('choose_a'): # use local params to choose action self.A = tf.clip_by_value(tf.squeeze(normal_dist.sample(1), axis=0), *A_BOUND) with tf.name_scope('local_grad'): self.a_params = tl.layers.get_variables_with_name(scope + '/actor', True, False) self.c_params = tl.layers.get_variables_with_name(scope + '/critic', True, False) self.a_grads = tf.gradients(self.a_loss, self.a_params) self.c_grads = tf.gradients(self.c_loss, self.c_params) with tf.name_scope('sync'): with tf.name_scope('pull'): self.pull_a_params_op = [l_p.assign(g_p) for l_p, g_p in zip(self.a_params, globalAC.a_params)] self.pull_c_params_op = [l_p.assign(g_p) for l_p, g_p in zip(self.c_params, globalAC.c_params)] with tf.name_scope('push'): self.update_a_op = OPT_A.apply_gradients(zip(self.a_grads, globalAC.a_params)) self.update_c_op = OPT_C.apply_gradients(zip(self.c_grads, globalAC.c_params))
def BatchRenorm(x, rmax, dmax, decay=0.9, epsilon=1e-5, use_scale=True, use_bias=True): """ Batch Renormalization layer, as described in the paper: `Batch Renormalization: Towards Reducing Minibatch Dependence in Batch-Normalized Models <https://arxiv.org/abs/1702.03275>`_. Args: x (tf.Tensor): a NHWC or NC tensor. rmax, dmax (tf.Tensor): a scalar tensor, the maximum allowed corrections. decay (float): decay rate of moving average. epsilon (float): epsilon to avoid divide-by-zero. use_scale, use_bias (bool): whether to use the extra affine transformation or not. Returns: tf.Tensor: a tensor named ``output`` with the same shape of x. Variable Names: * ``beta``: the bias term. * ``gamma``: the scale term. Input will be transformed by ``x * gamma + beta``. * ``mean/EMA``: the moving average of mean. * ``variance/EMA``: the moving average of variance. """ shape = x.get_shape().as_list() assert len(shape) in [2, 4] n_out = shape[-1] if len(shape) == 2: x = tf.reshape(x, [-1, 1, 1, n_out]) beta, gamma, moving_mean, moving_var = get_bn_variables( n_out, use_scale, use_bias, tf.constant_initializer(1.0)) ctx = get_current_tower_context() use_local_stat = ctx.is_training # for BatchRenorm, use_local_stat should always be is_training, unless a # different usage comes out in the future. if use_local_stat: xn, batch_mean, batch_var = tf.nn.fused_batch_norm(x, gamma, beta, epsilon=epsilon, is_training=True) inv_sigma = tf.rsqrt(moving_var, 'inv_sigma') r = tf.stop_gradient(tf.clip_by_value( tf.sqrt(batch_var) * inv_sigma, 1.0 / rmax, rmax)) d = tf.stop_gradient(tf.clip_by_value( (batch_mean - moving_mean) * inv_sigma, -dmax, dmax)) xn = xn * r + d else: xn = tf.nn.batch_normalization( x, moving_mean, moving_var, beta, gamma, epsilon) if len(shape) == 2: xn = tf.squeeze(xn, [1, 2]) if ctx.is_main_training_tower: return update_bn_ema(xn, batch_mean, batch_var, moving_mean, moving_var, decay) else: return tf.identity(xn, name='output')
def _get_coordinatewise_learning_rate(self, grad, var): # Compute the learning rate using a moving average for the diagonal of BB^T avg_first = self.get_slot(var, 'first_moment') avg_second = self.get_slot(var, 'second_moment') decay_tensor = tf.cast(self._decay_tensor, var.dtype) batch_size = tf.cast(self._batch_size_tensor, var.dtype) # Create an estimator for the moving average of gradient mean and variance # via Welford's algorithm if isinstance(grad, tf.Tensor): delta = grad - avg_first first_moment_update = avg_first.assign_add( delta * tf.where(self._counter < 1, tf.cast(1, var.dtype), 1. - decay_tensor)) with tf.control_dependencies([first_moment_update]): second_moment_update = avg_second.assign_add( tf.cast(self._counter < 1, var.dtype) * -(1. - decay_tensor) * ( avg_second - decay_tensor * tf.square(delta))) diag_preconditioner = control_flow_ops.with_dependencies( [second_moment_update], tf.clip_by_value(avg_second, 1e-12, 1e12)) elif isinstance(grad, tf.IndexedSlices): delta = grad.values - tf.gather_nd(avg_first, grad.indices) first_moment_update = tf.scatter_add( avg_first, grad.indices, delta * tf.where(self._counter < 1, tf.cast(1., var.dtype), 1. - decay_tensor)) with tf.control_dependencies([first_moment_update]): avg_second = tf.scatter_add( avg_second, grad.indices, tf.cast(self._counter < 1, var.dtype) * -(1. - decay_tensor) * ( tf.gather_nd(avg_second, grad.indices) - decay_tensor * tf.square(delta))) avg_second = tf.gather_nd(avg_second, grad.indices) # TODO(b/70783772): Needs dtype specific clipping. diag_preconditioner = tf.clip_by_value(avg_second, 1e-12, 1e12) else: raise tf.errors.InvalidArgumentError( None, None, 'grad must of type Tensor or IndexedSlice') diag_preconditioner *= batch_size if self._use_single_learning_rate: diag_preconditioner = tf.reduce_mean(diag_preconditioner) # From Theorem 2 Corollary 1 of Mandt et al. 2017 return 2. * batch_size / ( tf.cast(self._total_num_examples, var.dtype.base_dtype) * diag_preconditioner)
def __init__(self, env, env_name, _optimizer='adam'): """ :param env: Output of this Discriminator is reward for learning agent. Not the cost. Because discriminator predicts P(expert|s,a) = 1 - P(agent|s,a). """ self._optimizer = _optimizer env_header = env_name.split('-')[0] # CartPole-v1, Arcobot-v1, Pendulum-v0, HalfCheetah-v2, Hopper-v2, Walker2d-v2, Humanoid-v2 if env_header == 'CartPole' or env_header == 'Arcobot' or env_header == 'Pendulum' or env_header == 'MountainCar': #Classic control Gym action_space_count = env.action_space.n else: #Mujoco action_space_count = env.action_space.shape[0] with tf.variable_scope('discriminator'): self.scope = tf.get_variable_scope().name self.expert_s = tf.placeholder(dtype=tf.float32, shape=[None] + list(env.observation_space.shape)) self.expert_a = tf.placeholder(dtype=tf.int32, shape=[None]) expert_a_one_hot = tf.one_hot(self.expert_a, depth=action_space_count) # add noise for stabilise training expert_a_one_hot += tf.random_normal(tf.shape(expert_a_one_hot), mean=0.2, stddev=0.1, dtype=tf.float32)/1.2 expert_s_a = tf.concat([self.expert_s, expert_a_one_hot], axis=1) self.agent_s = tf.placeholder(dtype=tf.float32, shape=[None] + list(env.observation_space.shape)) self.agent_a = tf.placeholder(dtype=tf.int32, shape=[None]) agent_a_one_hot = tf.one_hot(self.agent_a, depth=action_space_count) # add noise for stabilise training agent_a_one_hot += tf.random_normal(tf.shape(agent_a_one_hot), mean=0.2, stddev=0.1, dtype=tf.float32)/1.2 agent_s_a = tf.concat([self.agent_s, agent_a_one_hot], axis=1) with tf.variable_scope('network') as network_scope: prob_1 = self.construct_network(input=expert_s_a) network_scope.reuse_variables() # share parameter prob_2 = self.construct_network(input=agent_s_a) with tf.variable_scope('loss'): loss_expert = tf.reduce_mean(tf.log(tf.clip_by_value(prob_1, 0.01, 1))) loss_agent = tf.reduce_mean(tf.log(tf.clip_by_value(1 - prob_2, 0.01, 1))) loss = loss_expert + loss_agent loss = -loss tf.summary.scalar('discriminator', loss) # optimizer: adagrad, rmsprop, adadelta, adam, cocob if self._optimizer == 'adagrad': optimizer = tf.train.AdagradOptimizer(learning_rate=0.01) # initial_accumulator_value=0.1 elif self._optimizer == 'rmsprop': optimizer = tf.train.RMSPropOptimizer(learning_rate=0.00025) # decay=0.9, momentum=0.0, epsilon=1e-10, use_locking=False, centered=False elif self._optimizer == 'adadelta': optimizer = tf.train.AdadeltaOptimizer(learning_rate=0.5) # learning_rate=0.001, rho=0.95, epsilon=1e-08, use_locking=False elif self._optimizer == 'cocob': optimizer = cocob.COCOB() else: # adam optimizer = tf.train.AdamOptimizer() # lr=0.001, beta1=0.9, beta2=0.999, epsilon=1e-08, use_locking=False self.train_op = optimizer.minimize(loss) self.rewards = tf.log(tf.clip_by_value(prob_2, 1e-10, 1)) # log(P(expert|s,a)) larger is better for agent
def get_reconstruction_cost(self): """Compute the cross-entropy of the original input and the reconstruction""" activation_h = self.propup(self.input) activation_v = self.propdown(activation_h) # Do this to not get Nan activation_v_clip = tf.clip_by_value(activation_v, clip_value_min=1e-30, clip_value_max=1.0) reduce_activation_v_clip = tf.clip_by_value(1.0 - activation_v, clip_value_min=1e-30, clip_value_max=1.0) cross_entropy = -tf.reduce_mean(tf.reduce_sum(self.input*(tf.log(activation_v_clip)) + (1.0 - self.input)*(tf.log(reduce_activation_v_clip)), axis=1)) return cross_entropy
def conv_cross_entropy(hypo, actual_value): """Calculate Cross Entropy Args: hypo -- TensorFlow variable of the hypothesis actual_value -- TensorFlow variable of the expected value Returns: TensorFlow variable of the Cross Entropy """ return -tf.reduce_mean( actual_value * tf.log(tf.clip_by_value(hypo, 1e-10, 1.0)) + (1-actual_value) * tf.log(tf.clip_by_value(1-hypo, 1e-10, 1.0)))
def __init__(self, config, is_training=True): self.batch_size = tf.Variable(0, dtype=tf.int32, trainable=False) num_step = config.num_step embed_dim = config.embed_dim self.input_data_s1 = tf.placeholder(tf.float64, [None, num_step, embed_dim]) self.input_data_s2 = tf.placeholder(tf.float64, [None, num_step, embed_dim]) self.target = tf.placeholder(tf.float64, [None]) self.mask_s1 = tf.placeholder(tf.float64, [None, num_step]) self.mask_s2 = tf.placeholder(tf.float64, [None, num_step]) self.hidden_neural_size = config.hidden_neural_size self.new_batch_size = tf.placeholder(tf.int32, shape=[], name="new_batch_size") self._batch_size_update = tf.assign(self.batch_size, self.new_batch_size) with tf.name_scope('lstm_output_layer'): self.cell_outputs1 = self.singleRNN(x=self.input_data_s1, scope='side1', cell='lstm', reuse=None) self.cell_outputs2 = self.singleRNN(x=self.input_data_s2, scope='side1', cell='lstm', reuse=True) with tf.name_scope('Sentence_Layer'): # self.sent1 = tf.reduce_sum(self.cell_outputs1 * self.mask_s1[:, :, None], axis=1) # self.sent2 = tf.reduce_sum(self.cell_outputs2 * self.mask_s2[:, :, None], axis=1) # self.mask_s1_sum=tf.reduce_sum(self.mask_s1,axis=0) # self.mask_s2_sum=tf.reduce_sum(self.mask_s2,axis=0) # self.mask_s1_sum1 = tf.reduce_sum(self.mask_s1, axis=1) # self.mask_s2_sum1 = tf.reduce_sum(self.mask_s2, axis=1) self.sent1 = tf.reduce_sum(self.cell_outputs1 * self.mask_s1[:, :, None], axis=1) self.sent2 = tf.reduce_sum(self.cell_outputs2 * self.mask_s2[:, :, None], axis=1) with tf.name_scope('loss'): diff = tf.abs(tf.subtract(self.sent1, self.sent2), name='err_l1') diff = tf.reduce_sum(diff, axis=1) self.sim = tf.clip_by_value(tf.exp(-1.0 * diff), 1e-7, 1.0 - 1e-7) self.loss = tf.square(tf.subtract(self.sim, tf.clip_by_value((self.target - 1.0) / 4.0, 1e-7, 1.0 - 1e-7))) with tf.name_scope('cost'): self.cost = tf.reduce_mean(self.loss) self.truecost = tf.reduce_mean(tf.square(tf.subtract(self.sim * 4.0 + 1.0, self.target))) if not is_training: return self.globle_step = tf.Variable(0, name="globle_step", trainable=False, dtype=tf.float64) self.lr = tf.Variable(0.0, trainable=False, dtype=tf.float64) tvars = tf.trainable_variables() grads = tf.gradients(self.cost, tvars) optimizer = tf.train.AdadeltaOptimizer(learning_rate=self.lr, epsilon=1e-6) with tf.name_scope('train'): self.train_op = optimizer.apply_gradients(zip(grads, tvars)) self.new_lr = tf.placeholder(tf.float64, shape=[], name="new_learning_rate") self._lr_update = tf.assign(self.lr, self.new_lr)
def mean_squared_logarithmic_error(y_true, y_pred): """Mean squared logarithmic error loss. Args: y_true: tf.Tensor. y_pred: tf.Tensor. Tensors of same shape and type. """ first_log = tf.log(tf.clip_by_value(y_pred, 1e-8, np.inf) + 1.0) second_log = tf.log(tf.clip_by_value(y_true, 1e-8, np.inf) + 1.0) return tf.reduce_mean(tf.square(first_log - second_log))
def build(self, y_hat, y, mask=None): if mask is None: self.loss = tf.reduce_mean( -tf.reduce_sum(y * tf.log(tf.clip_by_value(y_hat, 1e-10, 1.0)), reduction_indices=[1])) else: self.loss = tf.reduce_mean( -tf.reduce_sum( mask * y * tf.log(tf.clip_by_value(y_hat, 1e-10, 1.0)), reduction_indices=[1])) return self.loss
def critic_loss(self, states, actions, rewards, discounts, next_states): """Computes a loss for training the critic network. The loss is the mean squared error between the Q value predictions of the critic and Q values estimated using TD-lambda. Args: states: A [batch_size, num_state_dims] tensor representing a batch of states. actions: A [batch_size, num_action_dims] tensor representing a batch of actions. rewards: A [batch_size, ...] tensor representing a batch of rewards, broadcastable to the critic net output. discounts: A [batch_size, ...] tensor representing a batch of discounts, broadcastable to the critic net output. next_states: A [batch_size, num_state_dims] tensor representing a batch of next states. Returns: A rank-0 tensor representing the critic loss. Raises: ValueError: If any of the inputs do not have the expected dimensions, or if their batch_sizes do not match. """ self._validate_states(states) self._validate_actions(actions) self._validate_states(next_states) target_q_values = self.target_value_net(next_states, for_critic_loss=True) td_targets = target_q_values * discounts + rewards if self._target_q_clipping is not None: td_targets = tf.clip_by_value(td_targets, self._target_q_clipping[0], self._target_q_clipping[1]) q_values = self.critic_net(states, actions, for_critic_loss=True) td_errors = td_targets - q_values if self._debug_summaries: gen_debug_td_error_summaries( target_q_values, q_values, td_targets, td_errors) loss = self._td_errors_loss(td_targets, q_values) if self._residual_phi > 0.0: # compute residual gradient loss residual_q_values = self.value_net(next_states, for_critic_loss=True) residual_td_targets = residual_q_values * discounts + rewards if self._target_q_clipping is not None: residual_td_targets = tf.clip_by_value(residual_td_targets, self._target_q_clipping[0], self._target_q_clipping[1]) residual_td_errors = residual_td_targets - q_values residual_loss = self._td_errors_loss( residual_td_targets, residual_q_values) loss = (loss * (1.0 - self._residual_phi) + residual_loss * self._residual_phi) return loss
def mean_squared_logarithmic_error(y_true, y_pred): """ Parameters ---------- y_true : tf.Tensor y_pred : tf.Tensor Tensors of same shape and type. """ first_log = tf.log(tf.clip_by_value(y_pred, 1e-8, np.inf) + 1.0) second_log = tf.log(tf.clip_by_value(y_true, 1e-8, np.inf) + 1.0) return tf.reduce_mean(tf.square(first_log - second_log))
def __init__(self, p_values, low_action, high_action, stochastic, eps, theta=0.15, sigma=0.2, use_gaussian_noise=False, act_noise=0.1, is_target=False, target_noise=0.2, noise_clip=0.5, parameter_noise=False): # shape is [None, dim_action] deterministic_actions = ( (high_action - low_action) * p_values + low_action) if use_gaussian_noise: if is_target: normal_sample = tf.random_normal( tf.shape(deterministic_actions), stddev=target_noise) normal_sample = tf.clip_by_value(normal_sample, -noise_clip, noise_clip) stochastic_actions = tf.clip_by_value( deterministic_actions + normal_sample, low_action, high_action) else: normal_sample = tf.random_normal( tf.shape(deterministic_actions), stddev=act_noise) stochastic_actions = tf.clip_by_value( deterministic_actions + normal_sample, low_action, high_action) else: exploration_sample = tf.get_variable( name="ornstein_uhlenbeck", dtype=tf.float32, initializer=low_action.size * [.0], trainable=False) normal_sample = tf.random_normal( shape=[low_action.size], mean=0.0, stddev=1.0) exploration_value = tf.assign_add( exploration_sample, theta * (.0 - exploration_sample) + sigma * normal_sample) stochastic_actions = tf.clip_by_value( deterministic_actions + eps * (high_action - low_action) * exploration_value, low_action, high_action) self.actions = tf.cond( tf.logical_and(stochastic, not parameter_noise), lambda: stochastic_actions, lambda: deterministic_actions)
def lerp_clip(a, b, t): with tf.name_scope('LerpClip'): return a + (b - a) * tf.clip_by_value(t, 0.0, 1.0)
def window_poly6(r_sqr): return tf.clip_by_value((1 - r_sqr)**3, 0, 1)
def quantize_grad(op, grad): return tf.clip_by_value(tf.identity(grad), -1, 1)
def buildActorNetwork(self,d=128,dv=16,dout=128,nv=8): init_w = tf.random_normal_initializer(0., 0.01) init_b = tf.constant_initializer(0.01) with tf.variable_scope('update_Actor_network' + self.name): # enc f_dim = 128 encode_layer1 = tf.layers.Dense(512, activation=tf.nn.relu, kernel_initializer=init_w, bias_initializer=init_b, name='encoder_l1', trainable=True) encode_layer2 = tf.layers.Dense(f_dim, activation=tf.nn.relu, kernel_initializer=init_w, bias_initializer=init_b, name='encoder_l2', trainable=True) for i in range(self.agent_num): e1 = encode_layer1(self.state_holder[:, i * self.state_dim:(i + 1) * self.state_dim]) feature = encode_layer2(e1) if i == 0: self.feature_a = feature else: self.feature_a = tf.concat([self.feature_a, feature], axis=1) self.feature_a = tf.reshape(self.feature_a, [-1, f_dim, self.agent_num]) ##gai # relation1 d = d dv = dv dout = dout nv = nv r1_l1_v = tf.layers.Dense(dv * nv, activation=tf.nn.relu, kernel_initializer=init_w, bias_initializer=init_b, name='relation_l1_v', trainable=True) r1_l1_q = tf.layers.Dense(dv * nv, activation=tf.nn.relu, kernel_initializer=init_w, bias_initializer=init_b, name='relation_l1_q', trainable=True) r1_l1_k = tf.layers.Dense(dv * nv, activation=tf.nn.relu, kernel_initializer=init_w, bias_initializer=init_b, name='relation_l1_k', trainable=True) r1_out = tf.layers.Dense(dout, activation=tf.nn.relu, kernel_initializer=init_w, bias_initializer=init_b, name='relation_l1_k', trainable=True) for i in range(self.agent_num): v1 = tf.matmul(self.feature_a, tf.transpose(self.adj[:, i, :, :], [0, 2, 1])) q1 = tf.matmul(self.feature_a, tf.transpose(self.adj[:, i, :, :], [0, 2, 1])) k1 = tf.matmul(self.feature_a, tf.transpose(self.adj[:, i, :, :], [0, 2, 1])) v1 = tf.transpose(v1, [0, 2, 1]) q1 = tf.transpose(q1, [0, 2, 1]) k1 = tf.transpose(k1, [0, 2, 1]) v2 = r1_l1_v(v1) q2 = r1_l1_q(q1) k2 = r1_l1_k(k1) v = tf.reshape(v2, shape=[-1, self.neighbors, nv, dv]) q = tf.reshape(q2, shape=[-1, self.neighbors, nv, dv]) k = tf.reshape(k2, shape=[-1, self.neighbors, nv, dv]) v = tf.transpose(v, [0, 2, 1, 3]) k = tf.transpose(k, [0, 2, 3, 1]) q = tf.transpose(q, [0, 2, 1, 3]) att = tf.matmul(q, k) / np.sqrt(dv) att = tf.nn.softmax(att, axis=-1) out = tf.matmul(att, v) out = tf.transpose(out, [0, 2, 1, 3]) out = tf.reshape(out, shape=[-1, self.neighbors, dv * nv]) T = tf.matmul(self.vecholder, out) out = r1_out(T) if i == 0: self.relation_1_a = out else: self.relation_1_a = tf.concat([self.relation_1_a, out], axis=1) self.relation_1_a = tf.reshape(self.relation_1_a, [-1, dv * nv, self.agent_num]) ##gai # relation 2 r2_l1_v = tf.layers.Dense(dv * nv, activation=tf.nn.relu, kernel_initializer=init_w, bias_initializer=init_b, name='relation_l2_v', trainable=True) r2_l1_q = tf.layers.Dense(dv * nv, activation=tf.nn.relu, kernel_initializer=init_w, bias_initializer=init_b, name='relation_l2_q', trainable=True) r2_l1_k = tf.layers.Dense(dv * nv, activation=tf.nn.relu, kernel_initializer=init_w, bias_initializer=init_b, name='relation_l2_k', trainable=True) r2_out = tf.layers.Dense(dout, activation=tf.nn.relu, kernel_initializer=init_w, bias_initializer=init_b, name='relation_l2_k', trainable=True) for i in range(self.agent_num): v1 = tf.matmul(self.relation_1_a, tf.transpose(self.adj[:, i, :, :], [0, 2, 1])) q1 = tf.matmul(self.relation_1_a, tf.transpose(self.adj[:, i, :, :], [0, 2, 1])) k1 = tf.matmul(self.relation_1_a, tf.transpose(self.adj[:, i, :, :], [0, 2, 1])) v1 = tf.transpose(v1, [0, 2, 1]) q1 = tf.transpose(q1, [0, 2, 1]) k1 = tf.transpose(k1, [0, 2, 1]) v2 = r2_l1_v(v1) q2 = r2_l1_q(q1) k2 = r2_l1_k(k1) v = tf.reshape(v2, shape=[-1, self.neighbors, nv, dv]) q = tf.reshape(q2, shape=[-1, self.neighbors, nv, dv]) k = tf.reshape(k2, shape=[-1, self.neighbors, nv, dv]) v = tf.transpose(v, [0, 2, 1, 3]) k = tf.transpose(k, [0, 2, 3, 1]) q = tf.transpose(q, [0, 2, 1, 3]) att = tf.matmul(q, k) / np.sqrt(dv) att = tf.nn.softmax(att, axis=-1) out = tf.matmul(att, v) out = tf.transpose(out, [0, 2, 1, 3]) out = tf.reshape(out, shape=[-1, self.neighbors, dv * nv]) T = tf.matmul(self.vecholder, out) out = r2_out(T) if i == 0: self.relation_2_a = out else: self.relation_2_a = tf.concat([self.relation_2_a, out], axis=1) self.action_mean = tf.layers.Dense(1, activation=None, kernel_initializer=init_w, bias_initializer=init_b, name='mean', trainable=True) self.action_sigma = tf.layers.Dense(1, activation=None, kernel_initializer=init_w, bias_initializer=init_b, name='sigma', trainable=True) self.pi = [] self.action = [] for i in range(self.agent_num): h = tf.concat([self.feature_a[:, :, i], self.relation_1_a[:, i, :], self.relation_2_a[:, i, :]], axis=1) dis = tf.distributions.Normal(loc=self.action_mean(h), scale=self.action_sigma(h)) self.pi.append(dis) self.action.append(tf.squeeze(dis.sample([1]))) with tf.variable_scope('target_Actor_network' + self.name): # enc f_dim = 128 encode_layer1 = tf.layers.Dense(512, activation=tf.nn.relu, kernel_initializer=init_w, bias_initializer=init_b, name='encoder_l1', trainable=True) encode_layer2 = tf.layers.Dense(f_dim, activation=tf.nn.relu, kernel_initializer=init_w, bias_initializer=init_b, name='encoder_l2', trainable=True) for i in range(self.agent_num): e1 = encode_layer1(self.state_holder[:, i * self.state_dim:(i + 1) * self.state_dim]) feature = encode_layer2(e1) if i == 0: self.feature_a_old = feature else: self.feature_a_old = tf.concat([self.feature_a_old, feature], axis=1) self.feature_a_old = tf.reshape(self.feature_a_old, [-1, 128, self.agent_num]) ##gai # relation1 d = 128 dv = 16 dout = 128 nv = 8 r1_l1_v = tf.layers.Dense(dv * nv, activation=tf.nn.relu, kernel_initializer=init_w, bias_initializer=init_b, name='relation_l1_v', trainable=True) r1_l1_q = tf.layers.Dense(dv * nv, activation=tf.nn.relu, kernel_initializer=init_w, bias_initializer=init_b, name='relation_l1_q', trainable=True) r1_l1_k = tf.layers.Dense(dv * nv, activation=tf.nn.relu, kernel_initializer=init_w, bias_initializer=init_b, name='relation_l1_k', trainable=True) r1_out = tf.layers.Dense(dout, activation=tf.nn.relu, kernel_initializer=init_w, bias_initializer=init_b, name='relation_l1_k', trainable=True) for i in range(self.agent_num): v1 = tf.matmul(self.feature_a_old, tf.transpose(self.adj[:, i, :, :], [0, 2, 1])) q1 = tf.matmul(self.feature_a_old, tf.transpose(self.adj[:, i, :, :], [0, 2, 1])) k1 = tf.matmul(self.feature_a_old, tf.transpose(self.adj[:, i, :, :], [0, 2, 1])) v1 = tf.transpose(v1, [0, 2, 1]) q1 = tf.transpose(q1, [0, 2, 1]) k1 = tf.transpose(k1, [0, 2, 1]) v2 = r1_l1_v(v1) q2 = r1_l1_q(q1) k2 = r1_l1_k(k1) v = tf.reshape(v2, shape=[-1, self.neighbors, nv, dv]) q = tf.reshape(q2, shape=[-1, self.neighbors, nv, dv]) k = tf.reshape(k2, shape=[-1, self.neighbors, nv, dv]) v = tf.transpose(v, [0, 2, 1, 3]) k = tf.transpose(k, [0, 2, 3, 1]) q = tf.transpose(q, [0, 2, 1, 3]) att = tf.matmul(q, k) / np.sqrt(dv) att = tf.nn.softmax(att, axis=-1) out = tf.matmul(att, v) out = tf.transpose(out, [0, 2, 1, 3]) out = tf.reshape(out, shape=[-1, self.neighbors, dv * nv]) T = tf.matmul(self.vecholder, out) out = r1_out(T) if i == 0: self.relation_1_a_old = out else: self.relation_1_a_old = tf.concat([self.relation_1_a_old, out], axis=1) self.relation_1_a_old = tf.reshape(self.relation_1_a_old, [-1, dv * nv, self.agent_num]) ## # relation 2 r2_l1_v = tf.layers.Dense(dv * nv, activation=tf.nn.relu, kernel_initializer=init_w, bias_initializer=init_b, name='relation_l2_v', trainable=True) r2_l1_q = tf.layers.Dense(dv * nv, activation=tf.nn.relu, kernel_initializer=init_w, bias_initializer=init_b, name='relation_l2_q', trainable=True) r2_l1_k = tf.layers.Dense(dv * nv, activation=tf.nn.relu, kernel_initializer=init_w, bias_initializer=init_b, name='relation_l2_k', trainable=True) r2_out = tf.layers.Dense(dout, activation=tf.nn.relu, kernel_initializer=init_w, bias_initializer=init_b, name='relation_l2_k', trainable=True) for i in range(self.agent_num): v1 = tf.matmul(self.relation_1_a_old, tf.transpose(self.adj[:, i, :, :], [0, 2, 1])) q1 = tf.matmul(self.relation_1_a_old, tf.transpose(self.adj[:, i, :, :], [0, 2, 1])) k1 = tf.matmul(self.relation_1_a_old, tf.transpose(self.adj[:, i, :, :], [0, 2, 1])) v1 = tf.transpose(v1, [0, 2, 1]) q1 = tf.transpose(q1, [0, 2, 1]) k1 = tf.transpose(k1, [0, 2, 1]) v2 = r2_l1_v(v1) q2 = r2_l1_q(q1) k2 = r2_l1_k(k1) v = tf.reshape(v2, shape=[-1, self.neighbors, nv, dv]) q = tf.reshape(q2, shape=[-1, self.neighbors, nv, dv]) k = tf.reshape(k2, shape=[-1, self.neighbors, nv, dv]) v = tf.transpose(v, [0, 2, 1, 3]) k = tf.transpose(k, [0, 2, 3, 1]) q = tf.transpose(q, [0, 2, 1, 3]) att = tf.matmul(q, k) / np.sqrt(dv) att = tf.nn.softmax(att, axis=-1) out = tf.matmul(att, v) out = tf.transpose(out, [0, 2, 1, 3]) out = tf.reshape(out, shape=[-1, self.neighbors, dv * nv]) T = tf.matmul(self.vecholder, out) out = r2_out(T) if i == 0: self.relation_2_a_old = out else: self.relation_2_a_old = tf.concat([self.relation_2_a_old, out], axis=1) self.action_mean_old = tf.layers.Dense(1, activation=None, kernel_initializer=init_w, bias_initializer=init_b, name='mean_old', trainable=False) self.action_sigma_old = tf.layers.Dense(1, activation=None, kernel_initializer=init_w, bias_initializer=init_b, name='sigma_old', trainable=False) self.pi_old = [] self.action_old = [] for i in range(self.agent_num): h = tf.concat([self.feature_a_old[:, :, i], self.relation_1_a_old[:, i, :], self.relation_2_a_old[:, i, :]], axis=1) dis = tf.distributions.Normal(loc=self.action_mean_old(h), scale=self.action_sigma_old(h)) self.pi_old.append(dis) self.action_old.append(tf.squeeze(dis.sample([1]))) self.p_e_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='update_Actor_network' + self.name) self.p_t_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='target_Actor_network' + self.name) # train setting modify the loss!!! self.p_trainOp = [] for i in range(self.agent_num): ratio = tf.exp( tf.reshape(self.pi[i].log_prob(self.action_holder[:, i]), [-1, 1]) - tf.reshape( tf.clip_by_value(self.pi_old[i].log_prob(self.action_holder[:, i]), -20, 20), [-1, 1])) self.surrogate = ratio * self.advantage[:, i] self.clip_surrogate = tf.clip_by_value(ratio, 1. - self.epsilon_holder, 1 + self.epsilon_holder) * self.advantage[:, i] self.p_loss = -tf.reduce_mean(tf.minimum(self.surrogate, self.clip_surrogate)) grads, _ = tf.clip_by_global_norm(tf.gradients(self.p_loss, self.p_e_params), 5.) grads_and_vars = list(zip(grads, self.p_e_params)) self.p_trainOp.append( tf.train.AdamOptimizer(learning_rate=0.0001).apply_gradients(grads_and_vars, name="apply_gradients")) self.Actor_network_update = [tf.assign(tar, eva) for tar, eva in zip(self.p_t_params, self.p_e_params)]
def _init(self, ob_space, ac_space, hid_size, num_hid_layers, gaussian_fixed_var=True): assert isinstance(ob_space, gym.spaces.Box) # Add the variable to track layers self.num_hid_layers = num_hid_layers self.pdtype = pdtype = make_pdtype(ac_space) sequence_length = None ob = U.get_placeholder(name="ob", dtype=tf.float32, shape=[sequence_length] + list(ob_space.shape)) with tf.variable_scope("obfilter"): self.ob_rms = RunningMeanStd(shape=ob_space.shape) with tf.variable_scope('vf'): obz = tf.clip_by_value((ob - self.ob_rms.mean) / self.ob_rms.std, -5.0, 5.0) last_out = obz for i in range(num_hid_layers): last_out = tf.nn.tanh( tf.layers.dense( last_out, hid_size, name="fc%i" % (i + 1), kernel_initializer=U.normc_initializer(1.0))) self.vpred = tf.layers.dense( last_out, 1, name='final', kernel_initializer=U.normc_initializer(0.1))[:, 0] with tf.variable_scope('pol'): last_out = obz for i in range(num_hid_layers): last_out = tf.nn.tanh( tf.layers.dense( last_out, hid_size, name='fc%i' % (i + 1), kernel_initializer=U.normc_initializer(1.0))) if gaussian_fixed_var and isinstance(ac_space, gym.spaces.Box): mean = tf.layers.dense( last_out, pdtype.param_shape()[0] // 2, name='final', kernel_initializer=U.normc_initializer(0.01)) logstd = tf.get_variable( name="logstd", shape=[1, pdtype.param_shape()[0] // 2], initializer=tf.zeros_initializer()) pdparam = tf.concat([mean, mean * 0.0 + logstd], axis=1) else: pdparam = tf.layers.dense( last_out, pdtype.param_shape()[0], name='final', kernel_initializer=U.normc_initializer(0.01)) pdparam = tf.clip_by_value(pdparam, -5.0, 5.0) self.pd = pdtype.pdfromflat(pdparam) self.state_in = [] self.state_out = [] stochastic = tf.placeholder(dtype=tf.bool, shape=()) ac = U.switch(stochastic, self.pd.sample(), self.pd.mode()) self._act = U.function( [stochastic, ob], [ac, self.vpred, tf.exp(self.pd.logp(ac))])
masks = tf.sequence_mask(target_sequence_length, max_target_sequence_length, dtype=tf.float32, name='masks') with tf.name_scope("optimization"): # Loss function cost = tf.contrib.seq2seq.sequence_loss( training_logits, targets, masks) # Optimizer optimizer = tf.train.AdamOptimizer(lr) # Gradient Clipping gradients = optimizer.compute_gradients(cost) capped_gradients = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gradients if grad is not None] train_op = optimizer.apply_gradients(capped_gradients) # Batch and pad the source and target sequences # In[26]: """ DON'T MODIFY ANYTHING IN THIS CELL """ def pad_sentence_batch(sentence_batch, pad_int): """Pad sentences with <PAD> so that each sentence of a batch has the same length""" max_sentence = max([len(sentence) for sentence in sentence_batch]) return [sentence + [pad_int] * (max_sentence - len(sentence)) for sentence in sentence_batch]
def _clip_and_normalize(word_probs, epsilon): ''' word_probs: 1D tensor of [vsize] ''' word_probs = tf.clip_by_value(word_probs, epsilon, 1.0 - epsilon) return word_probs / tf.reduce_sum(word_probs, axis=-1, keep_dims=True) # scale preds so that the class probas of each sample sum to 1
def get_dataset(args, dataset, split, batch_size, limit, augment=False, normal_class=-1, outliers=False, add_obs_noise=False, add_iso_noise=False): if dataset == 'emnist-letters': dataset = 'emnist/letters' elif dataset == 'imagenet': dataset = 'downsampled_imagenet/32x32' if split == tfds.Split.TEST: split = tfds.Split.VALIDATION if dataset == 'uniform-noise': def random_uniform_generator(): while True: yield { 'image': np.random.randint(0, high=255, size=(28, 28, 1)) } ds = tf.data.Dataset.from_generator( random_uniform_generator, output_types={'image': tf.int32}, output_shapes={'image': (28, 28, 1)}) else: ds = tfds.load(name=dataset, split=split) if split == tfds.Split.TRAIN: ds = ds.shuffle(100000) if normal_class != -1: if outliers: ds = ds.filter(lambda x: tf.not_equal(x['label'], normal_class)) else: ds = ds.filter(lambda x: tf.equal(x['label'], normal_class)) ds = ds.take((limit // batch_size) * batch_size) \ .map(lambda x: x['image']) \ .map(lambda x: tf.cast(x, tf.float32)) if add_obs_noise: if dataset == 'downsampled_imagenet/32x32': ds = ds.map(lambda x: x + tf.random.uniform([32, 32, 3])) else: ds = ds.map(lambda x: x + tf.random.uniform(x.shape)) image_width = ds.output_shapes[0].value image_height = ds.output_shapes[1].value image_channels = ds.output_shapes[2].value if image_width != args.shape[0] or image_height != args.shape[1]: print('Resize (crop/pad) images to taget shape.') ds = ds.map(lambda x: tf.image.resize_image_with_crop_or_pad( x, args.shape[0], args.shape[1])) if image_channels != 3 and args.color: print('Transform grayscale images to rgb.') ds = ds.map(lambda x: tf.image.grayscale_to_rgb(x)) elif image_channels != 1 and not args.color: print('Transform rgb images to grayscale.') ds = ds.map(lambda x: tf.image.rgb_to_grayscale(x)) ds = ds.map(lambda x: x / 255.) if add_iso_noise: if split == tfds.Split.TRAIN: print("Adding iso noise to train of {}.".format(dataset)) ds = ds.map(lambda x: x + tf.random.normal(x.shape, stddev=.25)) ds = ds.map(lambda x: tf.clip_by_value(x, 0, 1)) if augment: ds = ds.map(lambda x: augment_transforms(x)) \ .map(lambda x: tf.clip_by_value(x, -1, 1)) ds = ds.map(lambda x: tf.transpose(x, [2, 0, 1])) \ .batch(batch_size) \ .repeat() \ .prefetch(2) iterator = ds.make_initializable_iterator() iterator_init_op = iterator.initializer get_next = iterator.get_next() return ds, iterator, iterator_init_op, get_next
def distort(rgb, bitmap): rgb = tf.image.random_brightness(rgb, 0.1) rgb = tf.image.random_contrast(rgb, 0.9, 1.1) # rgb = tf.image.per_image_standardization(rgb) # works great, but how to have it done for predict? rgb = tf.clip_by_value(rgb, clip_value_min=-1.0, clip_value_max=1.0) return rgb, bitmap
def _project_perturbation(perturbation, epsilon, input_image, image_bounds): """Project `perturbation` onto L-infinity ball of radius `epsilon`.""" clipped_perturbation = tf.clip_by_value(perturbation, -epsilon, epsilon) new_image = tf.clip_by_value(input_image + clipped_perturbation, image_bounds[0], image_bounds[1]) return new_image - input_image
def choose_action(self, obs_list): action = super().choose_action(obs_list) action = tf.clip_by_value( action + tf.random.normal(tf.shape(action), stddev=self.noise), -1., 1.) return action
def _build_model(self): # input points self.x = tf.placeholder(tf.float32, shape=[self.batch_size, int(np.prod(self.x_dims))], name="X") x = tf.tile(self.x, multiples=[self.n_samples, 1]) self.lr = tf.placeholder(tf.float32, shape=(), name="lr") self.p_z = dbns.Normal(loc=tf.zeros(shape=[self.batch_size * self.n_samples, self.z_dim]), scale=tf.ones(shape=[self.batch_size * self.n_samples, self.z_dim])) # self.p_h1 = dbns.Normal(loc=tf.zeros(shape=[self.batch_size * self.n_samples, 100]), # scale=tf.ones(shape=[self.batch_size * self.n_samples, 100])) # self.p_h2 = dbns.Normal(loc=tf.zeros(shape=[self.batch_size * self.n_samples, 50]), # scale=tf.ones(shape=[self.batch_size * self.n_samples, 50])) # self.p_h1_ = dbns.Normal(loc=tf.zeros(shape=[self.batch_size * self.n_samples, 100]), # scale=tf.ones(shape=[self.batch_size * self.n_samples, 100])) # encoder z_params = self.encoder(x) z_mu = z_params[:, self.z_dim:] z_sigma = tf.exp(z_params[:, :self.z_dim]) self.q_z = dbns.Normal(loc=z_mu, scale=z_sigma) # params_q_h1_x = self.encoder(x, scope="q_h1_x", hidden_dim=200, z_dim=100) # h1_mu = params_q_h1_x[:, 100:] # h1_sigma = tf.exp(params_q_h1_x[:, :100]) # self.q_h1_x = dbns.Normal(loc=h1_mu, scale=h1_sigma) # h1 = h1_mu + tf.multiply(h1_sigma, self.p_h1.sample()) # params_q_h2_h1 = self.encoder(h1, scope="q_h2_h1", hidden_dim=100, z_dim=50) # h2_mu = params_q_h2_h1[:, 50:] # h2_sigma = tf.exp(params_q_h2_h1[:, :50]) # self.q_h2_h1 = dbns.Normal(loc=h2_mu, scale=h2_sigma) # h2 = h2_mu + tf.multiply(h2_sigma, self.p_h2.sample()) z = z_mu + tf.multiply(z_sigma, self.p_z.sample()) # params_p_h1_h2 = self.encoder(h2, scope="p_h1_h2", hidden_dim=100, z_dim=100) # h1_mu_ = params_p_h1_h2[:, 100:] # h1_sigma_ = tf.exp(params_p_h1_h2[:, :100]) # self.p_h1_h2 = dbns.Normal(loc=h1_mu_, scale=h1_sigma_) # h1_ = h1_mu_ + tf.multiply(h1_sigma_, self.p_h1_.sample()) # x_hat = self.decoder(h1_, hidden_dim=200) # x_hat = self.decoder(h1, hidden_dim=200) x_hat = self.decoder(z) self.out_dbn = dbns.Bernoulli(logits=x_hat) log_lik = tf.reduce_sum(x * tf.log(1e-8 + x_hat) + (1 - x) * tf.log(1e-8 + 1 - x_hat), 1) neg_kld = tf.reduce_sum(self.p_z.log_prob(z) - self.q_z.log_prob(z), 1) # log_lik = (tf.reduce_sum(x * tf.log(1e-8 + x_hat) + (1 - x) * tf.log(1e-8 + 1 - x_hat), 1) + # tf.reduce_sum(self.p_h1_h2.log_prob(h1), 1)) # neg_kld = (tf.reduce_sum(self.p_h1_h2.log_prob(h1_) - self.q_h1_x.log_prob(h1), 1) + # tf.reduce_sum(self.p_h1.log_prob(h1) - self.q_h1_x.log_prob(h1), 1) + # tf.reduce_sum(self.p_h2.log_prob(h2) - self.q_h2_h1.log_prob(h2), 1)) # log_lik = (tf.reduce_sum(x * tf.log(1e-8 + x_hat) + (1 - x) * tf.log(1e-8 + 1 - x_hat), 1) + # tf.reduce_sum(self.p_h1_h2.log_prob(h1), 1) + tf.reduce_sum(self.p_h2.log_prob(h2), 1)) # neg_kld = tf.reduce_sum(self.q_h1_x.log_prob(h1), 1) + tf.reduce_sum(self.q_h2_h1.log_prob(h2), 1) # calculate importance weights using logsumexp and exp-normalize tricks log_iws = (tf.reshape(log_lik, [self.batch_size, self.n_samples]) - tf.reshape(neg_kld, [self.batch_size, self.n_samples])) max_log_iws = tf.reduce_max(log_iws, axis=1, keepdims=True) log_iws -= max_log_iws self.elbo = tf.reduce_mean(max_log_iws + tf.log(1e-8 + tf.reduce_mean( tf.exp(log_iws), axis=1, keepdims=True))) self.loss = -self.elbo # compute gradients log_norm_const = tf.log(tf.clip_by_value(tf.reduce_sum(tf.exp(log_iws), 1, keepdims=True), 1e-9, np.inf)) log_norm_iws = tf.reshape(log_iws - log_norm_const, shape=[-1]) norm_iws = tf.stop_gradient(tf.exp(log_norm_iws)) trainable_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) grads = tf.gradients(-tf.reshape(log_iws, [-1]) * norm_iws, trainable_vars) grads_and_vars = zip(grads, trainable_vars) # for now, hardcoding the Adam optimizer parameters used in the paper optimizer = tf.train.AdamOptimizer(learning_rate=self.lr, beta1=0.9, beta2=0.999, epsilon=0.0001) optimizer.apply_gradients(grads_and_vars) self.train_op = optimizer.minimize(self.loss) # for sampling self.z = self.encoder(self.x, trainable=False, reuse=True) self.z_pl = tf.placeholder(tf.float32, shape=[None, self.z_dim]) self.sample = self.decoder(self.z_pl, trainable=False, reuse=True) # tensorboard summaries x_img = tf.reshape(x, [-1] + self.x_dims) tf.summary.image('data', x_img) sample_img = tf.reshape(x_hat, [-1] + self.x_dims) tf.summary.image('samples', sample_img) tf.summary.scalar('log_lik', tf.reduce_mean(log_lik)) tf.summary.scalar('neg_kld', tf.reduce_mean(neg_kld)) tf.summary.scalar('loss', self.loss) tf.summary.scalar('elbo', self.elbo) self.merged = tf.summary.merge_all()
def _interpolate(im, x, y, out_size): with tf.variable_scope('_interpolate'): # constants num_batch = tf.shape(im)[0] height = tf.shape(im)[1] width = tf.shape(im)[2] channels = tf.shape(im)[3] x = tf.cast(x, 'float32') y = tf.cast(y, 'float32') height_f = tf.cast(height, 'float32') width_f = tf.cast(width, 'float32') out_height = out_size[0] out_width = out_size[1] zero = tf.zeros([], dtype='int32') max_y = tf.cast(tf.shape(im)[1] - 1, 'int32') max_x = tf.cast(tf.shape(im)[2] - 1, 'int32') # scale indices from [-1, 1] to [0, width/height] x = (x + 1.0)*(width_f) / 2.0 y = (y + 1.0)*(height_f) / 2.0 # do sampling x0 = tf.cast(tf.floor(x), 'int32') x1 = x0 + 1 y0 = tf.cast(tf.floor(y), 'int32') y1 = y0 + 1 x0 = tf.clip_by_value(x0, zero, max_x) x1 = tf.clip_by_value(x1, zero, max_x) y0 = tf.clip_by_value(y0, zero, max_y) y1 = tf.clip_by_value(y1, zero, max_y) dim2 = width dim1 = width*height base = _repeat(tf.range(num_batch)*dim1, out_height*out_width) base_y0 = base + y0*dim2 base_y1 = base + y1*dim2 idx_a = base_y0 + x0 idx_b = base_y1 + x0 idx_c = base_y0 + x1 idx_d = base_y1 + x1 # use indices to lookup pixels in the flat image and restore # channels dim im_flat = tf.reshape(im, tf.pack([-1, channels])) im_flat = tf.cast(im_flat, 'float32') Ia = tf.gather(im_flat, idx_a) Ib = tf.gather(im_flat, idx_b) Ic = tf.gather(im_flat, idx_c) Id = tf.gather(im_flat, idx_d) # and finally calculate interpolated values x0_f = tf.cast(x0, 'float32') x1_f = tf.cast(x1, 'float32') y0_f = tf.cast(y0, 'float32') y1_f = tf.cast(y1, 'float32') wa = tf.expand_dims(((x1_f-x) * (y1_f-y)), 1) wb = tf.expand_dims(((x1_f-x) * (y-y0_f)), 1) wc = tf.expand_dims(((x-x0_f) * (y1_f-y)), 1) wd = tf.expand_dims(((x-x0_f) * (y-y0_f)), 1) output = tf.add_n([wa*Ia, wb*Ib, wc*Ic, wd*Id]) return output
def hard_sigmoid(self, x): return tf.clip_by_value((x + 1.) / 2, 0, 1)
def _normalize_clip_observation(x, clip_range=[-5.0, 5.0]): rms = RunningMeanStd(shape=x.shape[1:]) norm_x = tf.clip_by_value((x - rms.mean) / rms.std, min(clip_range), max(clip_range)) return norm_x, rms
def call(self, inputs): means = tf.math.reduce_mean(inputs, axis=TIME_AXIS, keepdims=True) variances = tf.math.reduce_mean(tf.math.square(inputs - means), axis=TIME_AXIS) means = tf.squeeze(means, TIME_AXIS) stddevs = tf.math.sqrt(tf.clip_by_value(variances, 0, variances.dtype.max)) return tf.concat((means, stddevs), axis=TIME_AXIS)
def add_image_summaries(images: tf.Tensor, labels: tf.Tensor, preds: tf.Tensor, locs: tf.Tensor, k: int = 1) -> tf.Tensor: '''Adds image summaries for the k best and k worst images in each batch. Each image is overlayed with (lat, lon), label, and prediction. Args - images: tf.Tensor, shape [batch_size, H, W, C], type float32 - C must be either 3 (RGB order), or 1 (grayscale) - already standardized (relative to entire dataset) with mean 0, std 1 - labels: tf.Tensor, shape [batch_size] - preds: tf.Tensor, shape [batch_size] - locs: tf.Tensor, shape [batch_size, 2], each row is [lat, lon] - k: int, number of best and worst images to show per batch Returns: tf.summary, merged summaries ''' # For float tensors, tf.summary.image automatically scales min/max to 0/255. # Set +/- 3 std. dev. to 0/255. # We want to display images with our own scaling -> cast to tf.uint8 images = tf.clip_by_value((images / 6.0 + 0.5) * 255, clip_value_min=0, clip_value_max=255) images = tf.cast(images, tf.uint8) def write_on_imgs(imgs: np.ndarray, locs: np.ndarray, labels: np.ndarray, preds: np.ndarray) -> np.ndarray: '''Writes white text w/ black background onto images. Args - imgs: np.array, shape [num_imgs, H, W, C], type uint8 C must be either 1 or 3 - locs: np.array, shape [num_imgs, 2] - labels: np.array, shape [num_imgs] - preds: np.array, shape [num_imgs] Returns - new_imgs: np.array, shape [num_imgs, H, W, C] ''' C = imgs.shape[3] new_imgs = np.empty_like(imgs) for i, img in enumerate(imgs): if C == 1: img = img[:, :, 0] # remove C dim. new shape: [H, W] img = PIL.Image.fromarray(img) # write white text on black background draw = PIL.ImageDraw.Draw(img) text = 'loc: ({:.6f}, {:.6f})\nlabel: {:.4f}, pred: {:.4f}'.format( locs[i][0], locs[i][1], labels[i], preds[i]) size = draw.textsize(text) # (w, h) of text draw.rectangle(xy=[(0, 0), size], fill='black') draw.text(xy=(0, 0), text=text, fill='white') if C == 1: new_imgs[i, :, :, 0] = np.asarray(img) else: new_imgs[i] = np.asarray(img) return new_imgs diff = tf.abs(preds - labels) _, worst_indices = tf.nn.top_k(diff, k=k) _, best_indices = tf.nn.top_k(-1 * diff, k=k) worst_inputs = [ tf.gather(x, worst_indices) for x in [images, locs, labels, preds] ] worst_img_sum = tf.summary.image('worst_images_in_batch', tf.py_func(func=write_on_imgs, inp=worst_inputs, Tout=tf.uint8, stateful=False, name='write_on_worst_imgs'), max_outputs=k) best_inputs = [ tf.gather(x, best_indices) for x in [images, locs, labels, preds] ] best_img_sum = tf.summary.image('best_images_in_batch', tf.py_func(func=write_on_imgs, inp=best_inputs, Tout=tf.uint8, stateful=False, name='write_on_best_imgs'), max_outputs=k) return tf.summary.merge([worst_img_sum, best_img_sum])
# 通过tf.train.exponential_decay 函数生成学习率 """ 因为staircase为True,所以每训练100轮后学习率乘以0.96 实现了一下功能: decayed_learning_rate = learning_rate * decay_rate^(global_step / decay_steps) Args: learning_rate: The initial learning rate. 事先设定的学习率 global_step: 衰减速度 decay_steps: 衰减系数 decay_rate: 衰减系数 staircase: 默认为false, 当为True的时候,学习率成为一个阶梯函数 name: String. Optional name of the operation. Defaults to 'ExponentialDecay' """ learning_rate = tf.train.exponential_decay(learning_rate=0.1, global_step=global_step, decay_steps=100, decay_rate=0.96, staircase=True) # 定义损失函数和反向传播算法 cross_entropy = -tf.reduce_mean(y_ * tf.log(tf.clip_by_value(y, 1e-10, 1.0))) # 使用衰减指数的学习率,在minimize函数中传入global_step将自动更新global_step参数,从而使得学习率也得到相应更新 tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize( cross_entropy, global_step=global_step)
import numpy as np import tensorflow as tf import vgg16 from scipy.misc import imread, imresize import matplotlib.pyplot as plt import matplotlib.image as image from tqdm import tqdm sess = tf.Session() opt_img = tf.Variable(tf.truncated_normal([1, 224, 224, 3], dtype=tf.float32, stddev=1e-1), name='opt_img') tmp_img = tf.clip_by_value(opt_img, 0.0, 255.0) vgg = vgg16.vgg16(tmp_img, 'vgg16_weights.npz', sess) style_img = imread('style.png', mode='RGB') style_img = imresize(style_img, (224, 224)) style_img = np.reshape(style_img, [1, 224, 224, 3]) content_img = imread('content.png', mode='RGB') content_img = imresize(content_img, (224, 224)) content_img = np.reshape(content_img, [1, 224, 224, 3]) layers = [ 'conv1_1', 'conv1_2', 'conv2_1', 'conv2_2', 'conv3_1', 'conv3_2', 'conv3_3', 'conv4_1', 'conv4_2', 'conv4_3', 'conv5_1', 'conv5_2', 'conv5_3' ]
def _setup_model(self, rank, memory_size, alpha, obs_space, action_space, full_state_space, noise_target_action, **kwargs): self.graph = tf.Graph() with self.graph.as_default(): self.sess = tf_util.single_threaded_session(graph=self.graph) if self.use_prioritiy: from algorithm.priority_memory import PrioritizedMemory self.memory = PrioritizedMemory(capacity=memory_size, alpha=alpha) else: from algorithm.memory import Memory self.memory = Memory(limit=memory_size, action_shape=action_space.shape, observation_shape=obs_space.shape, full_state_shape=full_state_space.shape) # 定义 placeholders self.observe_Input = tf.placeholder(tf.float32, [None] + list(obs_space.shape), name='observe_Input') self.observe_Input_ = tf.placeholder(tf.float32, [None] + list(obs_space.shape), name='observe_Input_') self.f_s = tf.placeholder(tf.float32, [None] + list(full_state_space.shape), name='full_state_Input') self.f_s_ = tf.placeholder(tf.float32, [None] + list(full_state_space.shape), name='fill_state_Input_') self.R = tf.placeholder(tf.float32, [None, 1], 'r') self.terminals1 = tf.placeholder(tf.float32, shape=(None, 1), name='terminals1') self.ISWeights = tf.placeholder(tf.float32, [None, 1], name='IS_weights') self.n_step_steps = tf.placeholder(tf.float32, shape=(None, 1), name='n_step_reached') self.q_demo = tf.placeholder(tf.float32, [None, 1], name='Q_of_actions_from_memory') self.come_from_demo = tf.placeholder(tf.float32, [None, 1], name='Demo_index') self.action_memory = tf.placeholder(tf.float32, [None] + list(action_space.shape), name='actions_from_memory') with tf.variable_scope('obs_rms'): self.obs_rms = RunningMeanStd(shape=obs_space.shape) with tf.variable_scope('state_rms'): self.state_rms = RunningMeanStd(shape=full_state_space.shape) with tf.name_scope('obs_preprocess'): self.normalized_observe_Input = tf.clip_by_value( normalize(self.observe_Input, self.obs_rms), -5., 5.) self.normalized_observe_Input_ = tf.clip_by_value( normalize(self.observe_Input_, self.obs_rms), -5., 5.) with tf.name_scope('state_preprocess'): self.normalized_f_s0 = normalize(self.f_s, self.state_rms) self.normalized_f_s1 = normalize(self.f_s_, self.state_rms) with tf.variable_scope('Actor'): self.action, f_s_predict = self.build_actor( self.normalized_observe_Input, scope='eval', trainable=True, full_state_dim=full_state_space.shape[0]) self.action_, _ = self.build_actor( self.normalized_observe_Input_, scope='target', trainable=False, full_state_dim=full_state_space.shape[0]) # Target policy smoothing, by adding clipped noise to target actions if noise_target_action: epsilon = tf.random_normal(tf.shape(self.action_), stddev=0.007) epsilon = tf.clip_by_value(epsilon, -0.01, 0.01) a2 = self.action_ + epsilon noised_action_ = tf.clip_by_value(a2, -1, 1) else: noised_action_ = self.action_ with tf.variable_scope('Critic'): # Q值都要被clip 防止过估计. self.q_1 = tf.clip_by_value( self.build_critic(self.normalized_f_s0, self.action, scope='eval_1', trainable=True), self.Q_value_range[0], self.Q_value_range[1]) q_1_ = self.build_critic(self.normalized_f_s1, noised_action_, scope='target_1', trainable=False) if self.use_TD3: q_2 = tf.clip_by_value( self.build_critic(self.normalized_f_s0, self.action, scope='eval_2', trainable=True), self.Q_value_range[0], self.Q_value_range[1]) q_2_ = self.build_critic(self.normalized_f_s1, noised_action_, scope='target_2', trainable=False) # Collect networks parameters. It would make it more easily to manage them. self.ae_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Actor/eval') self.at_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Actor/target') self.ce1_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Critic/eval_1') self.ct1_params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='Critic/target_1') if self.use_TD3: self.ce2_params = tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope='Critic/eval_2') self.ct2_params = tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope='Critic/target_2') with tf.variable_scope('Soft_Update'): self.soft_replace_a = [ tf.assign(t, (1 - TAU) * t + TAU * e) for t, e in zip(self.at_params, self.ae_params) ] self.soft_replace_c = [ tf.assign(t, (1 - TAU) * t + TAU * e) for t, e in zip(self.ct1_params, self.ce1_params) ] if self.use_TD3: self.soft_replace_c += [ tf.assign(t, (1 - TAU) * t + TAU * e) for t, e in zip(self.ct2_params, self.ce2_params) ] # critic 的误差 为 (one-step-td 误差 + n-step-td 误差 + critic_online 的L2惩罚) # TD3: critic一共有4个, 算两套 critic的误差, 秀儿. with tf.variable_scope('Critic_Lose'): if self.use_TD3: min_q_ = tf.minimum(q_1_, q_2_) else: min_q_ = q_1_ self.q_target = self.R + (1. - self.terminals1) * GAMMA * min_q_ if self.use_n_step: self.n_step_target_q = self.R + ( 1. - self.terminals1) * tf.pow( GAMMA, self.n_step_steps) * min_q_ cliped_n_step_target_q = tf.clip_by_value( self.n_step_target_q, self.Q_value_range[0], self.Q_value_range[1]) cliped_q_target = tf.clip_by_value(self.q_target, self.Q_value_range[0], self.Q_value_range[1]) self.td_error_1 = tf.abs(cliped_q_target - self.q_1) if self.use_TD3: self.td_error_2 = tf.abs(cliped_q_target - q_2) if self.use_n_step: self.nstep_td_error_1 = tf.abs(cliped_n_step_target_q - self.q_1) if self.use_TD3: self.nstep_td_error_2 = tf.abs(cliped_n_step_target_q - q_2) L2_regular_1 = tf.contrib.layers.apply_regularization( tf.contrib.layers.l2_regularizer(0.001), weights_list=self.ce1_params) if self.use_TD3: L2_regular_2 = tf.contrib.layers.apply_regularization( tf.contrib.layers.l2_regularizer(0.001), weights_list=self.ce2_params) one_step_losse_1 = tf.reduce_mean( tf.multiply(self.ISWeights, tf.square( self.td_error_1))) * self.lambda_1_step if self.use_TD3: one_step_losse_2 = tf.reduce_mean( tf.multiply(self.ISWeights, tf.square( self.td_error_2))) * self.lambda_1_step if self.use_n_step: n_step_td_losses_1 = tf.reduce_mean( tf.multiply( self.ISWeights, tf.square( self.nstep_td_error_1))) * self.lambda_n_step c_loss_1 = one_step_losse_1 + n_step_td_losses_1 + L2_regular_1 if self.use_TD3: n_step_td_losses_2 = tf.reduce_mean( tf.multiply(self.ISWeights, tf.square(self.nstep_td_error_2)) ) * self.lambda_n_step c_loss_2 = one_step_losse_2 + n_step_td_losses_2 + L2_regular_2 else: c_loss_1 = one_step_losse_1 + L2_regular_1 if self.use_TD3: c_loss_2 = one_step_losse_2 + L2_regular_2 # actor 的 loss 为 最大化q(s,a) 最小化行为克隆误差. # (只有demo的transition 且 demo的action 比 actor生成的action q_1(s,a)高的时候 才会有克隆误差) with tf.variable_scope('Actor_lose'): Is_worse_than_demo = self.q_1 < self.q_demo Is_worse_than_demo = tf.cast(Is_worse_than_demo, tf.float32) worse_than_demo = tf.cast(tf.reduce_sum(Is_worse_than_demo), tf.int8) # 算action误差 我用的是平方和, 也有人用均方误差 reduce_mean. 其实都可以. # 我的action本来都是很小的数. action_diffs = Is_worse_than_demo * tf.reduce_sum( self.come_from_demo * tf.square(self.action - self.action_memory), 1, keepdims=True) L_BC = self.LAMBDA_BC * tf.reduce_sum(action_diffs) auxiliary_predict_loss = self.LAMBDA_predict * tf.reduce_mean( tf.square(f_s_predict - self.f_s)) a_loss = -tf.reduce_mean( self.q_1) + L_BC + auxiliary_predict_loss # Setting optimizer for Actor and Critic with tf.variable_scope('Critic_Optimizer'): if self.use_TD3: self.critic_grads_1 = tf_util.flatgrad( loss=c_loss_1, var_list=self.ce1_params) self.critic_grads_2 = tf_util.flatgrad( loss=c_loss_2, var_list=self.ce2_params) self.critic_optimizer_1 = MpiAdam(var_list=self.ce1_params, beta1=0.9, beta2=0.999, epsilon=1e-08) self.critic_optimizer_2 = MpiAdam(var_list=self.ce2_params, beta1=0.9, beta2=0.999, epsilon=1e-08) else: self.critic_grads = tf_util.flatgrad( loss=c_loss_1, var_list=self.ce1_params) self.critic_optimizer = MpiAdam(var_list=self.ce1_params, beta1=0.9, beta2=0.999, epsilon=1e-08) with tf.variable_scope('Actor_Optimizer'): self.actor_grads = tf_util.flatgrad(a_loss, self.ae_params) self.actor_optimizer = MpiAdam(var_list=self.ae_params, beta1=0.9, beta2=0.999, epsilon=1e-08) with self.sess.as_default(): self._initialize(self.sess) # 保存模型 var_list = tf.global_variables() print( "var_list!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n" ) for v in var_list: print(v) self.saver = tf.train.Saver(var_list=var_list, max_to_keep=1) self.writer = tf.summary.FileWriter( "logs/" + self.experiment_name + "/DDPG_" + str(rank), self.graph) # TensorBoard summary self.a_summary = tf.summary.merge([ tf.summary.scalar('a_loss', a_loss, family='actor'), tf.summary.scalar('L_BC', L_BC, family='actor'), tf.summary.scalar('worse_than_demo', worse_than_demo, family='actor'), tf.summary.scalar('auxiliary_predict_loss', auxiliary_predict_loss, family='actor') ]) if self.use_TD3: self.c_summary = tf.summary.merge([ tf.summary.scalar('c_loss_1', c_loss_1, family='critic'), tf.summary.scalar('c_loss_2', c_loss_2, family='critic') ]) else: self.c_summary = tf.summary.merge( [tf.summary.scalar('c_loss_1', c_loss_1, family='critic')]) # episode summary self.episode_cumulate_reward = tf.placeholder( tf.float32, name='episode_cumulate_reward') self.episoed_length = tf.placeholder( tf.int16, name='episode_cumulate_reward') self.success_or_not = tf.placeholder( tf.int8, name='episode_cumulate_reward') self.eval_episode_cumulate_reward = tf.placeholder( tf.float32, name='episode_cumulate_reward') self.eval_episoed_length = tf.placeholder( tf.int16, name='episode_cumulate_reward') self.eval_success_or_not = tf.placeholder( tf.int8, name='episode_cumulate_reward') self.episode_summary = tf.summary.merge([ tf.summary.scalar('episode_cumulate_reward', self.episode_cumulate_reward, family='episoed_result'), tf.summary.scalar('episoed_length', self.episoed_length, family='episoed_result'), tf.summary.scalar('success_or_not', self.success_or_not, family='episoed_result'), ]) self.eval_episode_summary = tf.summary.merge([ tf.summary.scalar('eval_episode_cumulate_reward', self.eval_episode_cumulate_reward, family='Eval_episoed_result'), tf.summary.scalar('eval_episoed_length', self.eval_episoed_length, family='Eval_episoed_result'), tf.summary.scalar('eval_success_or_not', self.eval_success_or_not, family='Eval_episoed_result'), ])
def __init__(self, session, action_size, width, height, states_size, optimizer=tf.train.AdamOptimizer(1e-4), eta=0.5, beta=0.01): self.layers = {} self.action_size = action_size self.optimizer = optimizer self.session = session self.width = width self.height = height self.states_size = states_size # beta is the entropy strength regularization term, a bigger entropy means higher emphasis on exploration self.beta = beta # eta regularizes the value to give more emphasis on the action taken, rather than the current states self.eta = eta with tf.device('/cpu:0'): with tf.variable_scope('network'): self.action = tf.placeholder('int32', [None], name='action') self.target_value = tf.placeholder('float32', [None], name='target_value') self.state, self.policy, self.value = self.build_model( self.width, self.height, self.states_size) self.weights = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='network') self.advantages = tf.placeholder('float32', [None], name='advantages') with tf.variable_scope('optimizer'): # Compute the one hot vectors for each action given. action_one_hot = tf.one_hot(self.action, self.action_size, 1.0, 0.0) # There are some issues when taking the log of the policy when it is exactly 1 or 0 min_policy = 1e-8 max_policy = 1 - min_policy # log policy is the expected log probability of arriving in the current states self.log_policy = tf.log( tf.clip_by_value(self.policy, min_policy, max_policy)) # log pi for action is the expected log probability of arriving in the current states given the # action taken self.log_pi_for_action = tf.reduce_sum(tf.multiply( self.log_policy, action_one_hot), axis=1) # We want to perform gradient ascent to maximize the discounted rewards, tf automatically tries to # reduce the loss, therefore we feed it the negative log policy multiplied by the estimate of the # advantage given by taking the current action in the current states self.policy_loss = -tf.reduce_mean( self.log_pi_for_action * self.advantages) # The value loss is just the squared deference between the current states's value and the desired value self.value_loss = tf.reduce_mean( tf.square(self.target_value - self.value)) # The entropy improves exploration by discouraging premature convergence to suboptimal deterministic # policies, in other words, to penalize a small entropy ( which means that the probability distribution # is concentrated in one action ) we subtract the entropy from the loss self.entropy = tf.reduce_sum( tf.multiply(self.policy, -self.log_policy)) # We try to minimize the loss such that the best actions are chosen more often self.loss = self.eta * self.value_loss + self.policy_loss - self.entropy * self.beta # Create a list of tuples of gradients and their respective weights grads = tf.gradients(self.loss, self.weights) # clip by global norm reduces the chances of gradients exploding grads, _ = tf.clip_by_global_norm(grads, 40.0) grads_vars = list(zip(grads, self.weights)) # Create an operator to apply the gradients using the optimizer. self.train_op = optimizer.apply_gradients(grads_vars)
def surrogate(self): r = self.network.mvn.prob(self.action_pl) / self.network_old.mvn.prob(self.action_pl) surr1 = r * self.adv_pl surr2 = tf.clip_by_value(r, 1.0 - self.epsilon, 1.0 + self.epsilon) * self.adv_pl return -tf.reduce_mean(tf.minimum(surr1, surr2))
def _get_target_action(self, vector_input): with tf.device(self.device): target_mu = self.actor_target_net(vector_input, None) return target_mu, tf.clip_by_value(target_mu + self.action_noise(), -1, 1)
def affinity_loss(labels, probs, num_classes, kld_margin): """Affinity Field (AFF) loss. This function computes AFF loss. There are several components in the function: 1) extracts edges from the ground-truth labels. 2) extracts ignored pixels and their paired pixels (the neighboring pixels on the eight corners). 3) extracts neighboring pixels on the eight corners from a 3x3 patch. 4) computes KL-Divergence between center pixels and their neighboring pixels from the eight corners. Args: labels: A tensor of size [batch_size, height_in, width_in], indicating semantic segmentation ground-truth labels. probs: A tensor of size [batch_size, height_in, width_in, num_classes], indicating segmentation predictions. num_classes: A number indicating the total number of valid classes. kld_margin: A number indicating the margin for KL-Divergence at edge. Returns: Two 1-D tensors value indicating the loss at edge and non-edge. """ # Compute ignore map (e.g, label of 255 and their paired pixels). labels = tf.squeeze(labels, axis=-1) # NxHxW ignore = nnx.ignores_from_label(labels, num_classes, 1) # NxHxWx8 not_ignore = tf.logical_not(ignore) not_ignore = tf.expand_dims(not_ignore, axis=3) # NxHxWx1x8 # Compute edge map. one_hot_lab = tf.one_hot(labels, depth=num_classes) edge = nnx.edges_from_label(one_hot_lab, 1, 255) # NxHxWxCx8 # Remove ignored pixels from the edge/non-edge. edge = tf.logical_and(edge, not_ignore) not_edge = tf.logical_and(tf.logical_not(edge), not_ignore) edge_indices = tf.where(tf.reshape(edge, [-1])) not_edge_indices = tf.where(tf.reshape(not_edge, [-1])) # Extract eight corner from the center in a patch as paired pixels. probs_paired = nnx.eightcorner_activation(probs, 1) # NxHxWxCx8 probs = tf.expand_dims(probs, axis=-1) # NxHxWxCx1 bot_epsilon = tf.constant(1e-4, name='bot_epsilon') top_epsilon = tf.constant(1.0, name='top_epsilon') neg_probs = tf.clip_by_value( 1-probs, bot_epsilon, top_epsilon) probs = tf.clip_by_value( probs, bot_epsilon, top_epsilon) neg_probs_paired= tf.clip_by_value( 1-probs_paired, bot_epsilon, top_epsilon) probs_paired = tf.clip_by_value( probs_paired, bot_epsilon, top_epsilon) # Compute KL-Divergence. kldiv = probs_paired*tf.log(probs_paired/probs) kldiv += neg_probs_paired*tf.log(neg_probs_paired/neg_probs) not_edge_loss = kldiv edge_loss = tf.maximum(0.0, kld_margin-kldiv) not_edge_loss = tf.reshape(not_edge_loss, [-1]) not_edge_loss = tf.gather(not_edge_loss, not_edge_indices) edge_loss = tf.reshape(edge_loss, [-1]) edge_loss = tf.gather(edge_loss, edge_indices) return edge_loss, not_edge_loss
def generate_noisy_image(image, noise_ratio): noise_image = VGG_MEAN_PIXELS + np.random.uniform( -20, 20, image.shape).astype(np.float32) return tf.clip_by_value( noise_image * noise_ratio + image * (1 - noise_ratio), 0.0, 255.0)
def train(self, lr=0.0002, epoch=100, schedule=10, resume=True, freeze_encoder=False, sample_steps=50, checkpoint_steps=500, clamp=0.001, d_iters=3): g_vars, d_vars = self.retrieve_trainable_vars(freeze_encoder=freeze_encoder) input_handle, loss_handle, _, summary_handle = self.retrieve_handles() if not self.sess: raise Exception("no session registered") learning_rate = tf.placeholder(tf.float32, name="learning_rate") d_optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(loss_handle.d_loss, var_list=d_vars) g_optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(loss_handle.g_loss, var_list=g_vars) cap_d_vars_ops = [val.assign(tf.clip_by_value(val, -clamp, clamp)) for val in d_vars] tf.global_variables_initializer().run() real_data = input_handle.real_data # filter by one type of labels data_provider = TrainDataProvider(self.data_dir) total_batches = data_provider.compute_total_batch_num(self.batch_size) val_batch_iter = data_provider.get_val(size=self.batch_size) saver = tf.train.Saver(max_to_keep=3) summary_writer = tf.summary.FileWriter(self.log_dir, self.sess.graph) if resume: _, model_dir = self.get_model_id_and_dir() self.restore_model(saver, model_dir) current_lr = lr counter = 0 start_time = time.time() for ei in range(epoch): train_batch_iter = data_provider.get_train_iter(self.batch_size) if (ei + 1) % schedule == 0: update_lr = current_lr / 2.0 # minimum learning rate guarantee update_lr = max(update_lr, 0.0002) print("decay learning rate from %.5f to %.5f" % (current_lr, update_lr)) current_lr = update_lr for bid, batch in enumerate(train_batch_iter): counter += 1 batch_images = batch # Optimize D self.sess.run(cap_d_vars_ops) _, batch_d_loss, d_loss_real, d_loss_fake, d_summary = self.sess.run([d_optimizer, loss_handle.d_loss, loss_handle.d_loss_real, loss_handle.d_loss_fake, summary_handle.d_merged], feed_dict={real_data: batch_images, learning_rate: current_lr }) # Optimize G _, batch_g_loss = self.sess.run([g_optimizer, loss_handle.g_loss], feed_dict={ real_data: batch_images, learning_rate: current_lr }) # magic move to Optimize G again # according to https://github.com/carpedm20/DCGAN-tensorflow # collect all the losses along the way _, batch_g_loss, \ const_loss, l1_loss, tv_loss, g_summary = self.sess.run([g_optimizer, loss_handle.g_loss, loss_handle.const_loss, loss_handle.l1_loss, loss_handle.tv_loss, summary_handle.g_merged], feed_dict={ real_data: batch_images, learning_rate: current_lr }) passed = time.time() - start_time log_format = "Epoch: [%2d], [%4d/%4d] time: %4.4f, d_loss: %.5f, g_loss: %.5f, " + \ "const_loss: %.5f, l1_loss: %.5f, tv_loss: %.5f, d_loss_real: %.7f, d_loss_fake: %.7f" print(log_format % (ei, bid, total_batches, passed, batch_d_loss, batch_g_loss, const_loss, l1_loss, tv_loss, d_loss_real, d_loss_fake)) summary_writer.add_summary(d_summary, counter) summary_writer.add_summary(g_summary, counter) if counter % sample_steps == 0: # sample the current model states with val data self.validate_model(val_batch_iter, ei, counter) if counter % checkpoint_steps == 0: print("Checkpoint: save checkpoint step %d" % counter) self.checkpoint(saver, counter) # valiation the models # print("val.examples len:{}".format(len(data_provider.val.examples))) # accuracy = 0.0 # iters = int(len(data_provider.val.examples) / self.batch_size) # for it in range(iters): # val_batch_iter = data_provider.get_val(size=self.batch_size) # accuracy += self.validate_last_model(val_batch_iter) # break # accuracy /= iters # print("Avg accuracy: %.5f" % accuracy) # save the last checkpoint print("Checkpoint: last checkpoint step %d" % counter) self.checkpoint(saver, counter)
def adaptive_affinity_loss(labels, one_hot_lab, probs, size, num_classes, kld_margin, w_edge, w_not_edge): """Adaptive affinity field (AAF) loss. This function computes AAF loss. There are three components in the function: 1) extracts edges from the ground-truth labels. 2) extracts ignored pixels and their paired pixels (usually the eight corner pixels). 3) extracts eight corner pixels/predictions from the center in a (2*size+1)x(2*size+1) patch 4) computes KL-Divergence between center pixels and their paired pixels (the eight corner). 5) imposes adaptive weightings on the loss. Args: labels: A tensor of size [batch_size, height_in, width_in], indicating semantic segmentation ground-truth labels. one_hot_lab: A tensor of size [batch_size, height_in, width_in, num_classes] which is the ground-truth labels in the form of one-hot vector. probs: A tensor of size [batch_size, height_in, width_in, num_classes], indicating segmentation predictions. size: A number indicating the half size of a patch. num_classes: A number indicating the total number of valid classes. The kld_margin: A number indicating the margin for KL-Divergence at edge. w_edge: A number indicating the weighting for KL-Divergence at edge. w_not_edge: A number indicating the weighting for KL-Divergence at non-edge. Returns: Two 1-D tensors value indicating the loss at edge and non-edge. """ # Compute ignore map (e.g, label of 255 and their paired pixels). labels = tf.squeeze(labels, axis=-1) # NxHxW ignore = nnx.ignores_from_label(labels, num_classes, size) # NxHxWx8 not_ignore = tf.logical_not(ignore) not_ignore = tf.expand_dims(not_ignore, axis=3) # NxHxWx1x8 # Compute edge map. edge = nnx.edges_from_label(one_hot_lab, size, 255) # NxHxWxCx8 # Remove ignored pixels from the edge/non-edge. edge = tf.logical_and(edge, not_ignore) not_edge = tf.logical_and(tf.logical_not(edge), not_ignore) edge_indices = tf.where(tf.reshape(edge, [-1])) not_edge_indices = tf.where(tf.reshape(not_edge, [-1])) # Extract eight corner from the center in a patch as paired pixels. probs_paired = nnx.eightcorner_activation(probs, size) # NxHxWxCx8 probs = tf.expand_dims(probs, axis=-1) # NxHxWxCx1 bot_epsilon = tf.constant(1e-4, name='bot_epsilon') top_epsilon = tf.constant(1.0, name='top_epsilon') neg_probs = tf.clip_by_value( 1-probs, bot_epsilon, top_epsilon) neg_probs_paired = tf.clip_by_value( 1-probs_paired, bot_epsilon, top_epsilon) probs = tf.clip_by_value( probs, bot_epsilon, top_epsilon) probs_paired = tf.clip_by_value( probs_paired, bot_epsilon, top_epsilon) # Compute KL-Divergence. kldiv = probs_paired*tf.log(probs_paired/probs) kldiv += neg_probs_paired*tf.log(neg_probs_paired/neg_probs) edge_loss = tf.maximum(0.0, kld_margin-kldiv) not_edge_loss = kldiv # Impose weights on edge/non-edge losses. one_hot_lab = tf.expand_dims(one_hot_lab, axis=-1) w_edge = tf.reduce_sum(w_edge*one_hot_lab, axis=3, keep_dims=True) # NxHxWx1x1 w_not_edge = tf.reduce_sum(w_not_edge*one_hot_lab, axis=3, keep_dims=True) # NxHxWx1x1 edge_loss *= w_edge not_edge_loss *= w_not_edge not_edge_loss = tf.reshape(not_edge_loss, [-1]) not_edge_loss = tf.gather(not_edge_loss, not_edge_indices) edge_loss = tf.reshape(edge_loss, [-1]) edge_loss = tf.gather(edge_loss, edge_indices) return edge_loss, not_edge_loss
from __future__ import division, print_function import tensorflow as tf import numpy as np from painter.wct.vgg_normalised import vgg_from_t7 from keras import backend as K from keras.models import Model from keras.layers import Input, UpSampling2D, Lambda from painter.wct.ops import pad_reflect, Conv2DReflect, torch_decay, wct_tf, wct_style_swap, adain from collections import namedtuple ### Helpers ### mse = tf.losses.mean_squared_error clip = lambda x: tf.clip_by_value(x, 0, 1) EncoderDecoder = namedtuple( 'EncoderDecoder', 'content_input content_encoder_model content_encoded \ style_encoded \ decoder_input, decoder_model decoded decoded_encoded \ pixel_loss feature_loss tv_loss total_loss \ train_op learning_rate global_step \ summary_op') ### WCT Model Graph ### class WCTModel(object): '''Model graph for Universal Style Transfer via Feature Transforms from https://arxiv.org/abs/1705.08086''' def __init__(self,
def _step(self) -> Dict[str, tf.Tensor]: """Do a step of SGD and update the priorities.""" # Pull out the data needed for updates/priorities. inputs = next(self._iterator) transitions: types.Transition = inputs.data keys, probs = inputs.info[:2] with tf.GradientTape() as tape: # Evaluate our networks. q_tm1 = self._network(transitions.observation) q_t_value = self._target_network(transitions.next_observation) q_t_selector = self._network(transitions.next_observation) # The rewards and discounts have to have the same type as network values. r_t = tf.cast(transitions.reward, q_tm1.dtype) r_t = tf.clip_by_value(r_t, -1., 1.) d_t = tf.cast(transitions.discount, q_tm1.dtype) * tf.cast( self._discount, q_tm1.dtype) # Compute the loss. _, extra = trfl.double_qlearning(q_tm1, transitions.action, r_t, d_t, q_t_value, q_t_selector) loss = losses.huber(extra.td_error, self._huber_loss_parameter) # Get the importance weights. importance_weights = 1. / probs # [B] importance_weights **= self._importance_sampling_exponent importance_weights /= tf.reduce_max(importance_weights) # Reweight. loss *= tf.cast(importance_weights, loss.dtype) # [B] loss = tf.reduce_mean(loss, axis=[0]) # [] # Do a step of SGD. gradients = tape.gradient(loss, self._network.trainable_variables) gradients, _ = tf.clip_by_global_norm(gradients, self._max_gradient_norm) self._optimizer.apply(gradients, self._network.trainable_variables) # Update the priorities in the replay buffer. if self._replay_client: priorities = tf.cast(tf.abs(extra.td_error), tf.float64) self._replay_client.update_priorities( table=adders.DEFAULT_PRIORITY_TABLE, keys=keys, priorities=priorities) # Periodically update the target network. if tf.math.mod(self._num_steps, self._target_update_period) == 0: for src, dest in zip(self._network.variables, self._target_network.variables): dest.assign(src) self._num_steps.assign_add(1) # Report loss & statistics for logging. fetches = { 'loss': loss, } return fetches
def loss(self, i, x): return tf.reduce_mean(tf.clip_by_value(tf.square(x - self.a), 0, 10))