def compute_loss(self, y_true, y_pred): batch_size = tf.shape(y_true)[0] h = tf.shape(y_true)[1] w = tf.shape(y_true)[2] n_chans = tf.shape(y_true)[3] n_pixels = h * w n_dims = 5 eps = 1e-8 # indices in batch, row, column format #y_pred_norm, center_x, center_y = self.compute_center_coords(y_pred) y_pred_norm, center_point_xyrgb = self.compute_center_coords(y_true, y_pred) center_point_xyrgb = tf.tile( tf.reshape(center_point_xyrgb, [batch_size, 1, n_dims]), (1, h * w, 1)) #center_x = tf.reshape(center_x, [batch_size]) #center_y = tf.reshape(center_y, [batch_size]) # make a batch_size x 3 matrix so we can index into the batch, r, c dimensions #center_rgb = tf.gather_nd(y_true, center_point_bxy) # should be batch_size x 3 true_rgbs = tf.reshape(y_true, [batch_size, n_pixels, n_chans]) im_coords = tf.concat([ tf.cast(tf.tile(self.xs, [batch_size, 1, 1]), tf.float32), tf.cast(tf.tile(self.ys, [batch_size, 1, 1]), tf.float32), true_rgbs ], axis=-1) # compute normalized distance, and weight using lambdas pixel_dists = ((im_coords - center_point_xyrgb) * self.lambdas_norm) ** 2 * self.lambdas soft_pixel_affinities = (1. - tf.exp(tf.reduce_sum(-0.5 * pixel_dists / self.sigma_norm ** 2, axis=-1))) soft_pixel_affinities = tf.reshape(soft_pixel_affinities, [batch_size, h, w]) # weight mask return soft_pixel_affinities * y_pred_norm
def build_predict(self, Xnew, full_cov=False): """ Xnew is a data matrix, point at which we want to predict This method computes p(F* | Y ) where F* are points on the GP at Xnew, Y are noisy observations at X. """ Kx = self.kern.K(self.X, Xnew) K = self.kern.K(self.X) + eye(self.num_data) * self.likelihood.variance L = tf.cholesky(K) A = tf.matrix_triangular_solve(L, Kx, lower=True) V = tf.matrix_triangular_solve(L, self.Y - self.mean_function(self.X)) fmean = tf.matmul(tf.transpose(A), V) + self.mean_function(Xnew) if full_cov: fvar = self.kern.K(Xnew) - tf.matmul(tf.transpose(A), A) shape = tf.pack([1, 1, tf.shape(self.Y)[1]]) fvar = tf.tile(tf.expand_dims(fvar, 2), shape) else: fvar = self.kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(A), 0) fvar = tf.tile(tf.reshape(fvar, (-1, 1)), [1, self.Y.shape[1]]) return fmean, fvar
def bond_conv_layer(activated_atoms, bv_params, layer): flow_depth = flow_layer_depths[layer] next_activated_atoms = tf.zeros(tf.pack([N_atoms_ph, flow_depth])) for deg in range(1, 6): indices = tf.sub(deg_list_ph, tf.constant(1,dtype=tf.int32)) flow_param = bv_params['A_flow'+str(layer)+'_'+str(deg)] flow_map = tf.gather(flow_param, type_adj_ph) multiples = tf.pack([N_atoms_ph, 1, 1]) activated_atoms_dim = tf.expand_dims(tf.tile(tf.expand_dims(activated_atoms, 0), multiples), 2) adj_mul = tf.batch_matmul(activated_atoms_dim, flow_map) adj_mul = tf.squeeze(adj_mul, [2]) deg_mask = tf.to_float(tf.equal(deg_list_ph, deg)) multiples = tf.pack([1, N_atoms_ph, flow_depth]) deg_list_dim = tf.tile(tf.expand_dims(tf.expand_dims(deg_mask, 1), 1), multiples) multiples = tf.pack([N_atoms_ph, N_atoms_ph, 1]) biases = tf.tile(bv_params['b_flow'+str(layer)+'_'+str(deg)], multiples) filtered_atoms = tf.add(tf.mul(adj_mul, deg_list_dim), biases) next_activated_atoms = next_activated_atoms + tf.reduce_sum(filtered_atoms, 1) next_activated_atoms = tf.nn.relu(next_activated_atoms) return next_activated_atoms
def tf_compute_distances(points, start_centers): """ Given a set of points and some centroids, computes the distance from each point to each centroid. :param points: a 2d TF tensor of shape num_points x dim :param start_centers: a numpy array of shape num_centroid x dim :return: a TF tensor of shape num_points x num_centroids """ with tf.variable_scope("distances"): # The dimensions in the problem (num_centroids, _) = np.shape(start_centers) # The shape of the block is extracted as a TF variable. num_points = tf.shape(points)[0] # The centers are embedded in the TF program. centers = tf.constant(start_centers) # Computation of the minimum distance. This is a standard implementation that follows # what MLlib does. squares = tf.reduce_sum(tf.square(points), reduction_indices=1) center_squares = tf.reduce_sum(tf.square(centers), reduction_indices=1) prods = tf.matmul(points, centers, transpose_b = True) # This code simply expresses two outer products: center_squares * ones(num_points) # and ones(num_centroids) * squares t1a = tf.expand_dims(center_squares, 0) t1b = tf.stack([num_points, 1]) t1 = tf.tile(t1a, t1b) t2a = tf.expand_dims(squares, 1) t2b = tf.stack([1, num_centroids]) t2 = tf.tile(t2a, t2b) distances = t1 + t2 - 2 * prods return distances
def encode_coordinates_alt(self, net): """An alternative implemenation for the encoding coordinates. Args: net: a tensor of shape=[batch_size, height, width, num_features] Returns: a list of tensors with encoded image coordinates in them. """ batch_size, h, w, _ = net.shape.as_list() h_loc = [ tf.tile( tf.reshape( tf.contrib.layers.one_hot_encoding( tf.constant([i]), num_classes=h), [h, 1]), [1, w]) for i in xrange(h) ] h_loc = tf.concat([tf.expand_dims(t, 2) for t in h_loc], 2) w_loc = [ tf.tile( tf.contrib.layers.one_hot_encoding(tf.constant([i]), num_classes=w), [h, 1]) for i in xrange(w) ] w_loc = tf.concat([tf.expand_dims(t, 2) for t in w_loc], 2) loc = tf.concat([h_loc, w_loc], 2) loc = tf.tile(tf.expand_dims(loc, 0), [batch_size, 1, 1, 1]) return tf.concat([net, loc], 3)
def knn_point(k, xyz1, xyz2): ''' Input: k: int32, number of k in k-nn search xyz1: (batch_size, ndataset, c) float32 array, input points xyz2: (batch_size, npoint, c) float32 array, query points Output: val: (batch_size, npoint, k) float32 array, L2 distances idx: (batch_size, npoint, k) int32 array, indices to input points ''' b = xyz1.get_shape()[0].value n = xyz1.get_shape()[1].value c = xyz1.get_shape()[2].value m = xyz2.get_shape()[1].value print b, n, c, m print xyz1, (b,1,n,c) xyz1 = tf.tile(tf.reshape(xyz1, (b,1,n,c)), [1,m,1,1]) xyz2 = tf.tile(tf.reshape(xyz2, (b,m,1,c)), [1,1,n,1]) dist = tf.reduce_sum((xyz1-xyz2)**2, -1) print dist, k outi, out = select_top_k(k, dist) idx = tf.slice(outi, [0,0,0], [-1,-1,k]) val = tf.slice(out, [0,0,0], [-1,-1,k]) print idx, val #val, idx = tf.nn.top_k(-dist, k=k) # ONLY SUPPORT CPU return val, idx
def testShapeFunctionEdgeCases(self): # Unknown multiples shape. inp = tf.constant(0.0, shape=[4, 4, 4, 4]) tiled = tf.tile(inp, tf.placeholder(tf.int32)) self.assertEqual([None, None, None, None], tiled.get_shape().as_list()) # Unknown input shape. inp = tf.placeholder(tf.float32) tiled = tf.tile(inp, [2, 2, 2, 2]) self.assertEqual([None, None, None, None], tiled.get_shape().as_list()) # Unknown input and multiples shape. inp = tf.placeholder(tf.float32) tiled = tf.tile(inp, tf.placeholder(tf.int32)) self.assertIs(None, tiled.get_shape().ndims) # Known input and partially known multiples. inp = tf.constant(0.0, shape=[1, 1]) tiled = tf.tile(inp, [tf.placeholder(tf.int32), 7]) self.assertEqual([None, 7], tiled.get_shape().as_list()) # Mismatched input rank and multiples length. inp = tf.placeholder(tf.float32, shape=[None, None]) with self.assertRaises(ValueError): tiled = tf.tile(inp, tf.placeholder(tf.int32, shape=[3]))
def _build_predict(self, Xnew, full_cov=False): """ Compute the mean and variance of the latent function at some new points Xnew. For a derivation of the terms in here, see the associated SGPR notebook. """ num_inducing = len(self.feature) err = self.Y - self.mean_function(self.X) Kuf = self.feature.Kuf(self.kern, self.X) Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level) Kus = self.feature.Kuf(self.kern, Xnew) sigma = tf.sqrt(self.likelihood.variance) L = tf.cholesky(Kuu) A = tf.matrix_triangular_solve(L, Kuf, lower=True) / sigma B = tf.matmul(A, A, transpose_b=True) + tf.eye(num_inducing, dtype=settings.float_type) LB = tf.cholesky(B) Aerr = tf.matmul(A, err) c = tf.matrix_triangular_solve(LB, Aerr, lower=True) / sigma tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True) tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True) mean = tf.matmul(tmp2, c, transpose_a=True) if full_cov: var = self.kern.K(Xnew) + tf.matmul(tmp2, tmp2, transpose_a=True) \ - tf.matmul(tmp1, tmp1, transpose_a=True) shape = tf.stack([1, 1, tf.shape(self.Y)[1]]) var = tf.tile(tf.expand_dims(var, 2), shape) else: var = self.kern.Kdiag(Xnew) + tf.reduce_sum(tf.square(tmp2), 0) \ - tf.reduce_sum(tf.square(tmp1), 0) shape = tf.stack([1, tf.shape(self.Y)[1]]) var = tf.tile(tf.expand_dims(var, 1), shape) return mean + self.mean_function(Xnew), var
def routing(input, b_IJ): ''' The routing algorithm. Args: input: A Tensor with [batch_size, num_caps_l=1152, 1, length(u_i)=8, 1] shape, num_caps_l meaning the number of capsule in the layer l. Returns: A Tensor of shape [batch_size, num_caps_l_plus_1, length(v_j)=16, 1] representing the vector output `v_j` in the layer l+1 Notes: u_i represents the vector output of capsule i in the layer l, and v_j the vector output of capsule j in the layer l+1. ''' # W: [num_caps_j, num_caps_i, len_u_i, len_v_j] W = tf.get_variable('Weight', shape=(1, 1152, 10, 8, 16), dtype=tf.float32, initializer=tf.random_normal_initializer(stddev=cfg.stddev)) # Eq.2, calc u_hat # do tiling for input and W before matmul # input => [batch_size, 1152, 10, 8, 1] # W => [batch_size, 1152, 10, 8, 16] input = tf.tile(input, [1, 1, 10, 1, 1]) W = tf.tile(W, [cfg.batch_size, 1, 1, 1, 1]) assert input.get_shape() == [cfg.batch_size, 1152, 10, 8, 1] # in last 2 dims: # [8, 16].T x [8, 1] => [16, 1] => [batch_size, 1152, 10, 16, 1] u_hat = tf.matmul(W, input, transpose_a=True) assert u_hat.get_shape() == [cfg.batch_size, 1152, 10, 16, 1] # line 3,for r iterations do for r_iter in range(cfg.iter_routing): with tf.variable_scope('iter_' + str(r_iter)): # line 4: # => [1, 1152, 10, 1, 1] c_IJ = tf.nn.softmax(b_IJ, dim=2) c_IJ = tf.tile(c_IJ, [cfg.batch_size, 1, 1, 1, 1]) assert c_IJ.get_shape() == [cfg.batch_size, 1152, 10, 1, 1] # line 5: # weighting u_hat with c_IJ, element-wise in the last two dims # => [batch_size, 1152, 10, 16, 1] s_J = tf.multiply(c_IJ, u_hat) # then sum in the second dim, resulting in [batch_size, 1, 10, 16, 1] s_J = tf.reduce_sum(s_J, axis=1, keep_dims=True) assert s_J.get_shape() == [cfg.batch_size, 1, 10, 16, 1] # line 6: # squash using Eq.1, v_J = squash(s_J) assert v_J.get_shape() == [cfg.batch_size, 1, 10, 16, 1] # line 7: # reshape & tile v_j from [batch_size ,1, 10, 16, 1] to [batch_size, 10, 1152, 16, 1] # then matmul in the last tow dim: [16, 1].T x [16, 1] => [1, 1], reduce mean in the # batch_size dim, resulting in [1, 1152, 10, 1, 1] v_J_tiled = tf.tile(v_J, [1, 1152, 1, 1, 1]) u_produce_v = tf.matmul(u_hat, v_J_tiled, transpose_a=True) assert u_produce_v.get_shape() == [cfg.batch_size, 1152, 10, 1, 1] b_IJ += tf.reduce_sum(u_produce_v, axis=0, keep_dims=True)
def w(input_data, cu, kappas_t_1, config): batch_size = config.batch_size mixture_size = config.mixture_size vocab_length = config.vocab_length # split along dim of mixture size * 3 hat_alphas_t, hat_betas_t, hat_kappas_t = tf.split(1, 3, input_data) alphas_t = tf.exp(hat_alphas_t) betas_t = tf.exp(hat_betas_t) kappas_t = tf.add(kappas_t_1, tf.exp(hat_kappas_t)) speech_length = tf.shape(cu)[1] u = tf.linspace(1.0, tf.cast(speech_length,tf.float32) , speech_length) u = tf.expand_dims(u, 0) u = tf.expand_dims(u, 0) u = tf.tile(u, [batch_size, mixture_size, 1]) alphas_t_expanded = tf.tile(tf.expand_dims(alphas_t, -1), [1, 1, speech_length]) betas_t_expanded = tf.tile(tf.expand_dims(betas_t, -1), [1, 1, speech_length]) kappas_t_expanded = tf.tile(tf.expand_dims(kappas_t, -1), [1, 1, speech_length]) calc = tf.square(tf.sub(kappas_t_expanded, u)) calc = tf.mul(calc, tf.neg(betas_t_expanded)) calc = tf.exp(calc) calc = tf.mul(calc, alphas_t_expanded) phi_t = tf.expand_dims(tf.reduce_sum(calc, 1), 1) output = tf.squeeze(tf.batch_matmul(phi_t, cu), [1]) return output, kappas_t, phi_t
def __init__(self, num_layers, num_units, batch_size, input_size, keep_prob=1.0): self.num_layers = num_layers self.grus = [] self.inits = [] self.dropout_mask = [] for layer in range(num_layers): input_size_ = input_size if layer == 0 else 2 * num_units gru_fw = tf.nn.rnn_cell.MultiRNNCell([ tf.contrib.cudnn_rnn.CudnnCompatibleGRUCell(num_units=num_units)]) gru_bw = tf.nn.rnn_cell.MultiRNNCell([ tf.contrib.cudnn_rnn.CudnnCompatibleGRUCell(num_units=num_units)]) init_fw = tf.Variable(tf.zeros([num_units])) init_fw = tf.expand_dims(tf.tile(tf.expand_dims(init_fw, axis=0), [batch_size, 1]), axis=0) init_bw = tf.Variable(tf.zeros([num_units])) init_bw = tf.expand_dims(tf.tile(tf.expand_dims(init_bw, axis=0), [batch_size, 1]), axis=0) mask_fw = tf.nn.dropout(tf.ones([1, batch_size, input_size_], dtype=tf.float32), keep_prob=keep_prob) mask_bw = tf.nn.dropout(tf.ones([1, batch_size, input_size_], dtype=tf.float32), keep_prob=keep_prob) self.grus.append((gru_fw, gru_bw,)) self.inits.append((init_fw, init_bw,)) self.dropout_mask.append((mask_fw, mask_bw,))
def while_step(t, rnn_state, tas, accs): """Implements one timestep of FIVO computation.""" log_weights_acc, log_p_hat_acc, kl_acc = accs cur_inputs, cur_mask = nested.read_tas([inputs_ta, mask_ta], t) # Run the cell for one step. log_q_z, log_p_z, log_p_x_given_z, kl, new_state = cell( cur_inputs, rnn_state, cur_mask, ) # Compute the incremental weight and use it to update the current # accumulated weight. kl_acc += kl * cur_mask log_alpha = (log_p_x_given_z + log_p_z - log_q_z) * cur_mask log_alpha = tf.reshape(log_alpha, [num_samples, batch_size]) log_weights_acc += log_alpha # Calculate the effective sample size. ess_num = 2 * tf.reduce_logsumexp(log_weights_acc, axis=0) ess_denom = tf.reduce_logsumexp(2 * log_weights_acc, axis=0) log_ess = ess_num - ess_denom # Calculate the ancestor indices via resampling. Because we maintain the # log unnormalized weights, we pass the weights in as logits, allowing # the distribution object to apply a softmax and normalize them. resampling_dist = tf.contrib.distributions.Categorical( logits=tf.transpose(log_weights_acc, perm=[1, 0])) ancestor_inds = tf.stop_gradient( resampling_dist.sample(sample_shape=num_samples, seed=random_seed)) # Because the batch is flattened and laid out as discussed # above, we must modify ancestor_inds to index the proper samples. # The particles in the ith filter are distributed every batch_size rows # in the batch, and offset i rows from the top. So, to correct the indices # we multiply by the batch_size and add the proper offset. Crucially, # when ancestor_inds is flattened the layout of the batch is maintained. offset = tf.expand_dims(tf.range(batch_size), 0) ancestor_inds = tf.reshape(ancestor_inds * batch_size + offset, [-1]) noresample_inds = tf.range(num_samples * batch_size) # Decide whether or not we should resample; don't resample if we are past # the end of a sequence. should_resample = resampling_criterion(num_samples, log_ess, t) should_resample = tf.logical_and(should_resample, cur_mask[:batch_size] > 0.) float_should_resample = tf.to_float(should_resample) ancestor_inds = tf.where( tf.tile(should_resample, [num_samples]), ancestor_inds, noresample_inds) new_state = nested.gather_tensors(new_state, ancestor_inds) # Update the TensorArrays before we reset the weights so that we capture # the incremental weights and not zeros. ta_updates = [log_weights_acc, log_ess, float_should_resample] new_tas = [ta.write(t, x) for ta, x in zip(tas, ta_updates)] # For the particle filters that resampled, update log_p_hat and # reset weights to zero. log_p_hat_update = tf.reduce_logsumexp( log_weights_acc, axis=0) - tf.log(tf.to_float(num_samples)) log_p_hat_acc += log_p_hat_update * float_should_resample log_weights_acc *= (1. - tf.tile(float_should_resample[tf.newaxis, :], [num_samples, 1])) new_accs = (log_weights_acc, log_p_hat_acc, kl_acc) return t + 1, new_state, new_tas, new_accs
def ae_latent_sample_beam(latents_dense_in, inputs, ed, embed, hparams): """Sample from the latent space in the autoencoder.""" vocab_size = 2**hparams.z_size beam_size = 1 # TODO(lukaszkaiser): larger beam sizes seem to work bad. inputs = tf.tile(inputs, [beam_size, 1, 1]) ed = tf.tile(ed, [beam_size, 1, 1, 1]) def symbols_to_logits_fn(ids): """Go from ids to logits.""" ids = tf.expand_dims(ids, axis=2) # Ids start with added all-zeros. latents_discrete = tf.pad(ids[:, 1:], [[0, 0], [0, 1], [0, 0]]) with tf.variable_scope(tf.get_variable_scope(), reuse=False): latents_dense = embed(latents_discrete) latents_pred = decode_transformer( inputs, ed, latents_dense, hparams, "extra") logits = tf.layers.dense(latents_pred, vocab_size, name="extra_logits") current_output_position = common_layers.shape_list(ids)[1] - 1 logits = logits[:, current_output_position, :, :] return tf.squeeze(logits, axis=[1]) initial_ids = tf.zeros([tf.shape(latents_dense_in)[0]], dtype=tf.int32) length = tf.shape(latents_dense_in)[1] ids, _ = beam_search.beam_search( symbols_to_logits_fn, initial_ids, beam_size, length, vocab_size, alpha=0.0, eos_id=-1, stop_early=False) res = tf.expand_dims(ids[:, 0, :], axis=2) # Pick first beam. return res[:, 1:] # Remove the added all-zeros from ids.
def tf_format_mnist_images(X, Y, Y_, n=100, lines=10): correct_prediction = tf.equal(tf.argmax(Y,1), tf.argmax(Y_,1)) correctly_recognised_indices = tf.squeeze(tf.where(correct_prediction), [1]) # indices of correctly recognised images incorrectly_recognised_indices = tf.squeeze(tf.where(tf.logical_not(correct_prediction)), [1]) # indices of incorrectly recognised images everything_incorrect_first = tf.concat([incorrectly_recognised_indices, correctly_recognised_indices], 0) # images reordered with indeces of unrecognised images first everything_incorrect_first = tf.slice(everything_incorrect_first, [0], [n]) # compute first 100 only - no space to display more anyway # compute n=100 digits to display only Xs = tf.gather(X, everything_incorrect_first) Ys = tf.gather(Y, everything_incorrect_first) Ys_ = tf.gather(Y_, everything_incorrect_first) correct_prediction_s = tf.gather(correct_prediction, everything_incorrect_first) digits_left = tf.image.grayscale_to_rgb(tensorflowvisu_digits.digits_left()) correct_tags = tf.gather(digits_left, tf.argmax(Ys_, 1)) # correct digits to be printed on the images digits_right = tf.image.grayscale_to_rgb(tensorflowvisu_digits.digits_right()) computed_tags = tf.gather(digits_right, tf.argmax(Ys, 1)) # computed digits to be printed on the images #superimposed_digits = correct_tags+computed_tags superimposed_digits = tf.where(correct_prediction_s, tf.zeros_like(correct_tags),correct_tags+computed_tags) # only pring the correct and computed digits on unrecognised images correct_bkg = tf.reshape(tf.tile([1.3,1.3,1.3], [28*28]), [1, 28,28,3]) # white background incorrect_bkg = tf.reshape(tf.tile([1.3,1.0,1.0], [28*28]), [1, 28,28,3]) # red background recognised_bkg = tf.gather(tf.concat([incorrect_bkg, correct_bkg], 0), tf.cast(correct_prediction_s, tf.int32)) # pick either the red or the white background depending on recognised status I = tf.image.grayscale_to_rgb(Xs) I = ((1-(I+superimposed_digits))*recognised_bkg)/1.3 # stencil extra data on top of images and reorder them unrecognised first I = tf.image.convert_image_dtype(I, tf.uint8, saturate=True) Islices = [] # 100 images => 10x10 image block for imslice in range(lines): Islices.append(tf.concat(tf.unstack(tf.slice(I, [imslice*n//lines,0,0,0], [n//lines,28,28,3])), 1)) I = tf.concat(Islices, 0) return I
def fztloss( f, pVecs, nVecs ): """ Tensorized cost function from Fast Zero-Shot Learning paper Args: f: The output from the network, a tensor of shape (# images, word embedding size) pVecs: The vector embeddings of the ground truth tags, a tensor of shape (# images, # positive tags, word embedding size) nVecs: The vector embeddings of negatively sampled tags, a tensor of shape (# images, # negative samples, word embedding size) Returns: Scalar tensor representing the batch cost """ posmul = tf.mul(pVecs, f) negmul = tf.mul(nVecs, f) tfpos = tf.reduce_sum(posmul, reduction_indices=2) tfneg = tf.reduce_sum(negmul, reduction_indices=2) tfpos = tf.transpose(tfpos, [1,0]) tfneg = tf.transpose(tfneg, [1,0]) negexpan = tf.tile( tf.expand_dims(tfneg, -1), [1, 1, tf.shape(tfpos)[1]] ) posexpan = tf.tile( tf.transpose(tf.expand_dims(tfpos, -1), [0,2,1]), [1, tf.shape(tfneg)[1], 1]) differences = tf.sub(negexpan, posexpan) return tf.reduce_sum(tf.reduce_sum(tf.log(1 + tf.exp(differences)), reduction_indices=[1,2]))
def compute_max_or_min(self, select, maxi=True): #computes the argmax and argmin of a column with probabilistic row selection answer = tf.zeros([ self.batch_size, self.num_cols + self.num_word_cols, self.max_elements ], self.data_type) sum_prob = tf.zeros([self.batch_size, self.num_cols + self.num_word_cols], self.data_type) for j in range(self.max_elements): if (maxi): curr_pos = j else: curr_pos = self.max_elements - 1 - j select_index = tf.slice(self.full_processed_sorted_index_column, [0, 0, curr_pos], [self.batch_size, -1, 1]) select_mask = tf.equal( tf.tile( tf.expand_dims( tf.tile( tf.expand_dims(tf.range(self.max_elements), 0), [self.batch_size, 1]), 1), [1, self.num_cols + self.num_word_cols, 1]), select_index) curr_prob = tf.expand_dims(select, 1) * tf.cast( select_mask, self.data_type) * self.select_bad_number_mask curr_prob = curr_prob * tf.expand_dims((1 - sum_prob), 2) curr_prob = curr_prob * tf.expand_dims( tf.cast((1 - sum_prob) > 0.0, self.data_type), 2) answer = tf.where(select_mask, curr_prob, answer) sum_prob += tf.reduce_sum(curr_prob, 2) return answer
def build_predict(self, Xnew, full_cov=False): """ Compute the mean and variance of the latent function at some new points Xnew. For a derivation of the terms in here, see the associated SGPR notebook. """ num_inducing = tf.shape(self.Z)[0] err = self.Y - self.mean_function(self.X) Kuf = self.kern.K(self.Z, self.X) Kuu = self.kern.K(self.Z) + eye(num_inducing) * 1e-6 Kus = self.kern.K(self.Z, Xnew) sigma = tf.sqrt(self.likelihood.variance) L = tf.cholesky(Kuu) A = tf.matrix_triangular_solve(L, Kuf, lower=True) / sigma B = tf.matmul(A, tf.transpose(A)) + eye(num_inducing) LB = tf.cholesky(B) Aerr = tf.matmul(A, err) c = tf.matrix_triangular_solve(LB, Aerr, lower=True) / sigma tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True) tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True) mean = tf.matmul(tf.transpose(tmp2), c) if full_cov: var = self.kern.K(Xnew) + tf.matmul(tf.transpose(tmp2), tmp2)\ - tf.matmul(tf.transpose(tmp1), tmp1) shape = tf.pack([1, 1, tf.shape(self.Y)[1]]) var = tf.tile(tf.expand_dims(var, 2), shape) else: var = self.kern.Kdiag(Xnew) + tf.reduce_sum(tf.square(tmp2), 0)\ - tf.reduce_sum(tf.square(tmp1), 0) shape = tf.pack([1, tf.shape(self.Y)[1]]) var = tf.tile(tf.expand_dims(var, 1), shape) return mean + self.mean_function(Xnew), var
def get_locs_cost(locs, mean, reward, base): mean_d = tf.tile(tf.expand_dims(mean, 1), [1, n_sample, 1]) tiled_base = tf.cast(tf.tile(tf.expand_dims(tf.expand_dims(base, 0), 0), [batch_size, n_sample]), tf.float32) print "TILED BASE SHAPE: ", tiled_base.get_shape(), tiled_base.dtype red_reward = reward - tiled_base reward_cost_arr = tf.mul(tf.reduce_sum(0.5 * tf.square((locs-mean_d)/stddev), 2), red_reward) return tf.div(tf.reduce_sum(reward_cost_arr), (n_sample * batch_size))
def compute_ans(op_embedding, comparison): op_embedding = tf.expand_dims(op_embedding, 0) #dot product of operation embedding with hidden state to the left of the number occurrence first = tf.transpose( tf.matmul(op_embedding, tf.transpose( tf.reduce_sum(hidden_vectors * tf.tile( tf.expand_dims( tf.transpose(self.batch_ordinal_question), 2), [1, 1, self.utility.FLAGS.embedding_dims]), 0)))) second = self.batch_question_number_one_mask + tf.transpose( tf.matmul(op_embedding, tf.transpose( tf.reduce_sum(hidden_vectors * tf.tile( tf.expand_dims( tf.transpose(self.batch_ordinal_question_one), 2 ), [1, 1, self.utility.FLAGS.embedding_dims]), 0)))) question_number_softmax = tf.nn.softmax(tf.concat(axis=1, values=[first, second])) if (self.mode == "test"): cond = tf.equal(question_number_softmax, tf.reshape( tf.reduce_max(question_number_softmax, 1), [self.batch_size, 1])) question_number_softmax = tf.where( cond, tf.fill(tf.shape(question_number_softmax), 1.0), tf.fill(tf.shape(question_number_softmax), 0.0)) question_number_softmax = tf.cast(question_number_softmax, self.data_type) ans = tf.reshape( tf.reduce_sum(question_number_softmax * tf.concat( axis=1, values=[self.batch_question_number, self.batch_question_number_one]), 1), [self.batch_size, 1]) return ans
def _meshgrid(depth, height, width, z_near, z_far): with tf.variable_scope('_meshgrid'): x_t = tf.reshape( tf.tile(tf.linspace(-1.0, 1.0, width), [height * depth]), [depth, height, width]) y_t = tf.reshape( tf.tile(tf.linspace(-1.0, 1.0, height), [width * depth]), [depth, width, height]) y_t = tf.transpose(y_t, [0, 2, 1]) sample_grid = tf.tile( tf.linspace(float(z_near), float(z_far), depth), [width * height]) z_t = tf.reshape(sample_grid, [height, width, depth]) z_t = tf.transpose(z_t, [2, 0, 1]) z_t = 1 / z_t d_t = 1 / z_t x_t /= z_t y_t /= z_t x_t_flat = tf.reshape(x_t, (1, -1)) y_t_flat = tf.reshape(y_t, (1, -1)) d_t_flat = tf.reshape(d_t, (1, -1)) ones = tf.ones_like(x_t_flat) grid = tf.concat([d_t_flat, y_t_flat, x_t_flat, ones], 0) return grid
def __init__(self, config): self.inputs = [ev.placeholder(config) for ev in config.evidence] exists = [ev.exists(i) for ev, i in zip(config.evidence, self.inputs)] zeros = tf.zeros([config.batch_size, config.latent_size], dtype=tf.float32) # Compute the denominator used for mean and covariance for ev in config.evidence: ev.init_sigma(config) d = [tf.where(exist, tf.tile([1. / tf.square(ev.sigma)], [config.batch_size]), tf.zeros(config.batch_size)) for ev, exist in zip(config.evidence, exists)] d = 1. + tf.reduce_sum(tf.stack(d), axis=0) denom = tf.tile(tf.reshape(d, [-1, 1]), [1, config.latent_size]) # Compute the mean of Psi with tf.variable_scope('mean'): # 1. compute encoding self.encodings = [ev.encode(i, config) for ev, i in zip(config.evidence, self.inputs)] encodings = [encoding / tf.square(ev.sigma) for ev, encoding in zip(config.evidence, self.encodings)] # 2. pick only encodings from valid inputs that exist, otherwise pick zero encoding encodings = [tf.where(exist, enc, zeros) for exist, enc in zip(exists, encodings)] # 3. tile the encodings according to each evidence type encodings = [[enc] * ev.tile for ev, enc in zip(config.evidence, encodings)] encodings = tf.stack(list(chain.from_iterable(encodings))) # 4. compute the mean of non-zero encodings self.psi_mean = tf.reduce_sum(encodings, axis=0) / denom # Compute the covariance of Psi with tf.variable_scope('covariance'): I = tf.ones([config.batch_size, config.latent_size], dtype=tf.float32) self.psi_covariance = I / denom
def q_zt(self, unused_observation, prev_state, t): batch_size = tf.shape(prev_state)[0] q_mu = tf.tile(self.mus[t][tf.newaxis, :], [batch_size, 1]) q_sigma = tf.maximum(tf.nn.softplus(self.sigmas[t]), self.sigma_min) q_sigma = tf.tile(q_sigma[tf.newaxis, :], [batch_size, 1]) q_zt = tf.contrib.distributions.Normal(loc=q_mu, scale=tf.sqrt(q_sigma)) return q_zt
def build_loss(self, logits, labels, lambs): # put a sigfunction on logits and then transpose logits = tf.transpose(framwork.sig_func(logits)) # according to the labels, erase rows which is not in labels labels_unique = tf.constant(range(self.image_classes), dtype=tf.int32) labels_num = self.image_classes logits = tf.gather(logits, indices=labels_unique) lambs = tf.gather(lambs, indices=labels_unique) # set the value of each row to True when it occurs in labels template = tf.tile(tf.expand_dims(labels_unique, dim=1), [1, self.batch_size]) labels_expand = tf.tile(tf.expand_dims(labels, dim=0), [labels_num, 1]) indict_logic = tf.equal(labels_expand, template) # split the tensor along rows logit_list = tf.split(0, labels_num, logits) indict_logic_list = tf.split(0, labels_num, indict_logic) lambda_list = tf.split(0, self.image_classes, lambs) # loss_list = list() # for i in range(self.image_classes): # loss_list.append(framwork.loss_func(logit_list[i], indict_logic_list[i], lambda_list[i])) loss_list = map(framwork.loss_func, logit_list, indict_logic_list, lambda_list) loss = tf.add_n(loss_list) tensors_dict = {'labels_unique': labels_unique, 'template': template, 'logits_sig_trans': logits, 'loss': loss, 'indict_logic': indict_logic} self.tensors_names.extend(tensors_dict.keys()) self.net_tensors.update(tensors_dict)
def build_predict(self, Xnew, full_cov=False): """ Compute the mean and variance of the latent function at some new points Xnew. """ _, _, Luu, L, _, _, gamma = self.build_common_terms() Kus = self.kern.K(self.Z, Xnew) # size M x Xnew w = tf.matrix_triangular_solve(Luu, Kus, lower=True) # size M x Xnew tmp = tf.matrix_triangular_solve(tf.transpose(L), gamma, lower=False) mean = tf.matmul(tf.transpose(w), tmp) + self.mean_function(Xnew) intermediateA = tf.matrix_triangular_solve(L, w, lower=True) if full_cov: var = ( self.kern.K(Xnew) - tf.matmul(tf.transpose(w), w) + tf.matmul(tf.transpose(intermediateA), intermediateA) ) var = tf.tile(tf.expand_dims(var, 2), tf.pack([1, 1, tf.shape(self.Y)[1]])) else: var = ( self.kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(w), 0) + tf.reduce_sum(tf.square(intermediateA), 0) ) # size Xnew, var = tf.tile(tf.expand_dims(var, 1), tf.pack([1, tf.shape(self.Y)[1]])) return mean, var
def build_network(self): net_tensors = self.net_tensors with self.net_graph.as_default(), tf.device(self.net_device): logits = tf.placeholder(dtype=tf.float32, shape=(self.batch_size, self.image_classes)) labels = tf.placeholder(dtype=tf.int32, shape=(self.batch_size,)) lambs = tf.placeholder(dtype=tf.float32, shape=(self.image_classes,)) # put a sigfunction on logits and then transpose logits = tf.transpose(framwork.sig_func(logits)) # according to the labels, erase rows which is not in labels labels_unique = tf.constant(range(self.image_classes), dtype=tf.int32) labels_num = self.image_classes logits = tf.gather(logits, indices=labels_unique) lambs = tf.gather(lambs, indices=labels_unique) # set the value of each row to True when it occurs in labels templete = tf.tile(tf.expand_dims(labels_unique, dim=1), [1, self.batch_size]) labels_expand = tf.tile(tf.expand_dims(labels, dim=0), [labels_num, 1]) indict_logic = tf.equal(labels_expand, templete) # split the tensor along rows logit_list = tf.split(0, labels_num, logits) indict_logic_list = tf.split(0, labels_num, indict_logic) lamb_list = tf.split(0, self.image_classes, lambs) logit_list = [tf.squeeze(item) for item in logit_list] indict_logic_list = [tf.squeeze(item) for item in indict_logic_list] left_right_tuples = list() for i in range(self.image_classes): left_right_tuples.append(framwork.lamb_func(logit_list[i], indict_logic_list[i], lamb=lamb_list[i])) # func = framwork.lamb_func() # left_right_tuples = map(func, logit_list, indict_logic_list, lamb_list) net_tensors.update({'left_right_tuples': left_right_tuples, 'logits': logits, 'labels': labels, 'lambs': lambs})
def compute_attention(self, image, text): with tf.variable_scope("attention") as scope: if self.reuse: scope.reuse_variables() text_replicated = self._replicate_features(text, (1, 14, 14, 1), project=self.project) # Now both the features from the resnet and lstm are concatenated along the depth axis features = tf.nn.dropout(tf.concat([image, text_replicated], axis=3), keep_prob=self.dropout_prob) conv1 = tf.nn.dropout(self.conv2d_layer(features, filters=512, kernel_size=(1,1), name="attention_conv1"), keep_prob=self.dropout_prob) conv2 = self.conv2d_layer(conv1, filters=2, kernel_size=(1,1), name="attention_conv2") # Flatenning each attention map to perform softmax attention_map = tf.reshape(conv2, (self.batch_size, 14*14, 2)) attention_map = tf.nn.softmax(attention_map, axis=1, name = "attention_map") image = tf.reshape(image, (self.batch_size, 196, 2048, 1)) attention = tf.tile(tf.expand_dims(attention_map, 2), (1, 1, 2048, 1)) image = tf.tile(image,(1,1,1,2)) weighted = image * attention weighted_average = tf.reduce_mean(weighted, 1) # Flatten both glimpses into a single vector weighted_average = tf.reshape(weighted_average, (self.batch_size, 2048*2)) attention_output = tf.nn.dropout(tf.concat([weighted_average, text], 1), self.dropout_prob) return attention_output
def _tf_sample_generator(self): archit = self.network_architecture depth = len(archit) - 1 self.samp_prob1_tfhl_list = [tf.tile(self.transfer_fun(self.bias_list[depth]), [1, self.batch_size])] # top layer is just the bias self.sample_tfhl_list = [sampleInt(self.samp_prob1_tfhl_list[0])] self.samp_w_tfhl_list = [tf.ones([1, self.batch_size])] sample_handle = [self.samp_var_list[depth].assign(self.sample_tfhl_list[0]),\ self.samp_w_var_list[depth].assign(self.samp_w_tfhl_list[0]),\ self.samp_prob1_var_list[depth].assign(self.samp_prob1_tfhl_list[0])] # sample from top to the bottom for i in range(depth-1, -1, -1): # not include top one n = archit[i] m = archit[i+1] spb = self.transfer_fun(tf.matmul(self.weights_list[i], self.sample_tfhl_list[0]) +\ tf.tile(self.bias_list[i], [1, self.batch_size])) # we need to save the prob of sample sp = sampleInt(spb) spb_assign_handle = self.samp_prob1_var_list[i].assign(spb) sp_assign_handle = self.samp_var_list[i].assign(sp) #compute_importance_weight(Hi+1, Hi, H_wi+1, W, b) spw = compute_importance_weight(self.sample_tfhl_list[0], sp, self.samp_w_tfhl_list[0], self.weights_list[i], self.bias_list[i], self.batch_size) spw_assign_handle = self.samp_w_var_list[i].assign(spw) sample_handle.extend([sp_assign_handle, spw_assign_handle, spb_assign_handle]) self.samp_prob1_tfhl_list.insert(0, spb) self.sample_tfhl_list.insert(0, sp) self.samp_w_tfhl_list.insert(0, spw) return sample_handle
def call(self, inputs): # print("in call") # TODO: check input dtype # Tile kb_inputs kb_inputs = self.kb_inputs for i in range(inputs.shape.ndims - 1): kb_inputs = tf.expand_dims(kb_inputs, 0) kb_inputs = tf.tile(kb_inputs, tf.concat((tf.shape(inputs)[:-1], [1, 1]), 0)) # Expand kb_mask kb_mask = self.kb_mask for i in range(inputs.shape.ndims - 2): kb_mask = tf.expand_dims(kb_mask, 1) kb_mask = tf.expand_dims(kb_mask, -1) # Tile inputs kb_size = tf.shape(self.kb_inputs)[0] tiling = tf.concat(([1] * (inputs.shape.ndims - 1), [kb_size], [1]), 0) cell_inputs = tf.tile(tf.expand_dims(inputs, -2), tiling) outputs = tf.concat([kb_inputs, cell_inputs], -1) outputs = tf.multiply(outputs, kb_mask) for layer in self.layers: outputs = layer.call(outputs) # outputs = tf.Print(outputs, [outputs], "KB attention pre-last layer output =") outputs = tf.squeeze(outputs, [-1]) # print("inputs shape =", inputs.shape) # print("outputs shape =", outputs.shape) outputs = tf.concat([self.output_layer(inputs), outputs], -1) # print("out of call") return outputs
def loss(logits, labels, lambs): # put a sigfunction on logits and then transpose logits = tf.transpose(framwork.sig_func(logits)) # according to the labels, erase rows which is not in labels labels_unique = tf.constant(range(NUM_CLASSES), dtype=tf.int32) labels_num = NUM_CLASSES # logits = tf.gather(logits, indices=labels_unique) # lambs = tf.gather(lambs, indices=labels_unique) # set the value of each row to True when it occurs in labels template = tf.tile(tf.expand_dims(labels_unique, dim=1), [1, BATCH_SIZE]) labels_expand = tf.tile(tf.expand_dims(labels, dim=0), [labels_num, 1]) indict_logic = tf.equal(labels_expand, template) # split the tensor along rows logit_list = tf.split(0, labels_num, logits) indict_logic_list = tf.split(0, labels_num, indict_logic) lambda_list = tf.split(0, NUM_CLASSES, lambs) # loss_list = list() # for i in range(self.image_classes): # loss_list.append(framwork.loss_func(logit_list[i], indict_logic_list[i], lambda_list[i])) loss_list = map(framwork.loss_func, logit_list, indict_logic_list, lambda_list) losses = tf.add_n(loss_list) tf.add_to_collection('losses', losses) # The total loss is defined as the cross entropy loss plus all of the weight # decay terms (L2 loss). return tf.add_n(tf.get_collection('losses'), name='total_loss')
def r_xn(self, z_t, t): """Computes a distribution over the future observations given current latent state. The indexing in these messages is 1 indexed and inclusive. This is consistent with the latex documents. Args: z_t: [batch_size, state_size] Tensor t: Current timestep """ tf.logging.info( "r(x_{start}:{end} | z_{t}) ~ N(z_{t}, sigma_{t})".format( **{"t": t, "start": (self.first_future_obs_index(t)+1)*self.steps_per_obs, "end": self.num_timesteps-1})) batch_size = tf.shape(z_t)[0] # the mean for all future observations is the same. # this tiling results in a [batch_size, num_future_obs, state_size] Tensor r_mu = tf.tile(z_t[:,tf.newaxis,:], [1, self.num_future_obs(t), 1]) # compute the variance r_sigma = tf.maximum(tf.nn.softplus(self.sigmas[t]), self.sigma_min) # the variance is the same across all state dimensions, so we only have to # time sigma to be [batch_size, num_future_obs]. r_sigma = tf.tile(r_sigma[tf.newaxis,:, tf.newaxis], [batch_size, 1, self.state_size]) return tf.contrib.distributions.Normal( loc=r_mu, scale=tf.sqrt(r_sigma))
# 1D features f1d_seq = msa1hot[0, :, :20] f1d_pssm = msa2pssm(msa1hot, w) f1d = tf.concat(values=[f1d_seq, f1d_pssm], axis=1) f1d = tf.expand_dims(f1d, axis=0) f1d = tf.reshape(f1d, [1, ncol, 42]) # 2D features f2d_dca = tf.cond(nrow > 1, lambda: fast_dca(msa1hot, w), lambda: tf.zeros([ncol, ncol, 442], tf.float32)) f2d_dca = tf.expand_dims(f2d_dca, axis=0) f2d = tf.concat([ tf.tile(f1d[:, :, None, :], [1, 1, ncol, 1]), tf.tile(f1d[:, None, :, :], [1, ncol, 1, 1]), f2d_dca ], axis=-1) f2d = tf.reshape(f2d, [1, ncol, ncol, 442 + 2 * 42]) # # 2D network # layers2d = [f2d] layers2d.append(conv2d(layers2d[-1], n2d_filters, 1, padding='SAME')) layers2d.append(tf.contrib.layers.instance_norm(layers2d[-1])) layers2d.append(activation(layers2d[-1])) # stack of residual blocks with dilations dilation = 1
def decoding_layer(self): ''' 构造Decoder层 参数: - target_letter_to_int: target数据的映射表 - decoding_embedding_size: embed向量大小 - num_layers: 堆叠的RNN单元数量 - rnn_size: RNN单元的隐层结点数量 - target_sequence_length: target数据序列长度 - max_target_sequence_length: target数据序列最大长度 - encoder_state: encoder端编码的状态向量 - decoder_input: decoder端输入 ''' # 1. Embedding decoder_embeddings = tf.Variable(tf.random_uniform([len(self.data.word_letter_to_int), self.args.decoding_embedding_size])) decoder_embed_input = tf.nn.embedding_lookup(decoder_embeddings, self.decoder_input) # 2. 构造Decoder中的RNN单元 def get_decoder_cell(rnn_size): decoder_cell = tf.contrib.rnn.LSTMCell(rnn_size, initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=2)) single_cell=tf.contrib.rnn.DropoutWrapper(decoder_cell,output_keep_prob=self.drop_out) return single_cell cell = tf.contrib.rnn.MultiRNNCell([get_decoder_cell(self.args.rnn_size) for _ in range(self.args.num_layers)]) # 3. Output全连接层 output_layer = Dense(len(self.data.word_letter_to_int), kernel_initializer = tf.truncated_normal_initializer(mean = 0.0, stddev=0.1)) # 4. Training decoder with tf.variable_scope("decode"): # 得到help对象 training_helper = tf.contrib.seq2seq.TrainingHelper(inputs=decoder_embed_input, sequence_length=self.target_sequence_length, time_major=False) # 构造decoder training_decoder = tf.contrib.seq2seq.BasicDecoder(cell, training_helper, self.encoder_state, output_layer) #tf.contrib.seq2seq.dynamic_decode执行decode,最终返回:(final_outputs, final_state, final_sequence_lengths) self.training_decoder_output, _,_ = tf.contrib.seq2seq.dynamic_decode(training_decoder, maximum_iterations=self.max_target_sequence_length) #tf.identity是返回了一个一模一样新的tensor self.training_logits = tf.identity(self.training_decoder_output.rnn_output, 'logits') # 5. Predicting decoder # Replicate encoder infos beam_width times if (self.args.mode=='test'): with tf.variable_scope("predict"): decoder_initial_state = tf.contrib.seq2seq.tile_batch(self.encoder_state, multiplier=self.args.beam_size) start_tokens = tf.tile(tf.constant([self.data.word_letter_to_int['<GO>']], dtype=tf.int32), [self.args.batch_size], name='start_tokens') # Define a beam-search decoder decoder = tf.contrib.seq2seq.BeamSearchDecoder( cell=cell, embedding=decoder_embeddings, start_tokens=start_tokens, end_token=self.data.word_letter_to_int['<EOS>'], initial_state=decoder_initial_state, beam_width=self.args.beam_size, output_layer=output_layer, length_penalty_weight=0.0) # Dynamic decoding self.predict_decoder_outputs,_,_ = tf.contrib.seq2seq.dynamic_decode(decoder, maximum_iterations=self.max_target_sequence_length) self.predicts = tf.identity(tf.transpose(self.predict_decoder_outputs.predicted_ids, perm=[0, 2, 1]),'predictions')
def __init__(self, session, learning_rate, data_size, static_data_size, lstm_size): self.sess = session self.data_size = data_size self.static_data_size = static_data_size self.gpu_inputs = tf.placeholder(tf.float32, [None, None, data_size]) self.gpu_labels = tf.placeholder(tf.float32, [None]) if static_data_size > 0: self.gpu_static = tf.placeholder(tf.float32, [None, static_data_size]) with tf.variable_scope("lstm"): total_time = tf.shape(self.gpu_inputs)[1] lstm = tf.contrib.rnn.LSTMCell(lstm_size, num_proj=1, forget_bias=1.0) self.W = tf.Variable( (np.random.rand(data_size + static_data_size, lstm_size) - 0.5) * 0.01, dtype=tf.float32) self.b = tf.Variable(np.zeros((lstm_size)), dtype=tf.float32) self.stacked_lstm = tf.contrib.rnn.MultiRNNCell([lstm] * 1) tiled_static = tf.tile( tf.reshape(self.gpu_static, [-1, 1, static_data_size]), [1, total_time, 1]) preLSTM = tf.tanh( linear_layer( tf.concat([self.gpu_inputs, tiled_static], axis=2), self.W, self.b, data_size + static_data_size, lstm_size)) output, state = tf.nn.dynamic_rnn(self.stacked_lstm, preLSTM, dtype=tf.float32, time_major=False, parallel_iterations=1, swap_memory=True) else: with tf.variable_scope("lstm"): total_time = tf.shape(self.gpu_inputs)[1] lstm = tf.contrib.rnn.LSTMCell(lstm_size, num_proj=1, forget_bias=1.0) self.W = tf.Variable( (np.random.rand(data_size, lstm_size) - 0.5) * 0.01, dtype=tf.float32) self.b = tf.Variable(np.zeros((lstm_size)), dtype=tf.float32) self.stacked_lstm = tf.contrib.rnn.MultiRNNCell([lstm] * 1) preLSTM = tf.tanh( linear_layer(self.gpu_inputs, self.W, self.b, data_size, lstm_size)) output, state = tf.nn.dynamic_rnn(self.stacked_lstm, preLSTM, dtype=tf.float32, time_major=False, parallel_iterations=1, swap_memory=True) lstm_scope = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="lstm") self.y = tf.sigmoid( tf.reshape(tf.slice(output, [0, total_time - 1, 0], [-1, 1, -1]), [-1])) self.overall_cost = tf.reduce_sum( -tf.multiply(self.gpu_labels, tf.log(self.y)) - tf.multiply(1 - self.gpu_labels, tf.log(1 - self.y))) self.training_op = tf.train.AdamOptimizer(learning_rate).minimize( self.overall_cost, var_list=lstm_scope) self.saver = tf.train.Saver(var_list=lstm_scope, keep_checkpoint_every_n_hours=1)
def space_tiling(x): # expand from [None, 64] to [None, 4, 4, 64] x = tf.expand_dims(tf.expand_dims(x, 1), 1) return tf.tile(x, [1, 4, 4, 1])
def dmnrun(fulldata, queask): # Loading saved meta graph sess = tf.Session() saver = tf.train.import_meta_graph("C:/Users/Mark/PycharmProjects/DMNTrain/weights/model.meta") saver.restore(sess, tf.train.latest_checkpoint('C:/Users/Mark/PycharmProjects/DMNTrain/weights')) tf.reset_default_graph() def wideArray(x, weight): wide = np.zeros([len(x), weight]) for i in range(0, len(x)): for j in range(0, len(x[i])): wide[i][j] = x[i][j] return wide def octalConv(x): ans = [] rows = [] words = [] for line in x.split(' '): for word in line: number = ord(word) convNum = oct(number) convNum = int(convNum[2:]) rows.append(ans) ans = [] words.append(line) ans = wideArray(rows, 50) return ans, words def contextualize(data, quest): """ Read in the input and question and build a context sets. Output is a list of data points, each of which is a 7-element tuple containing: The sentences in the context in vectorized form. The sentences in the context as a list of string tokens. The question in vectorized form. The question as a list of string tokens. The answer in vectorized form. The answer as a list of string tokens. A list of numbers for supporting statements, which is currently unused. """ output = [] context = [] for entry in data: # Turn input into a word vector # TODO: Change to Octal Decimal encoding context.append(octalConv(entry[:-1])) # Wrap up object so DMN can use it comp_context = tuple(zip(*context)) output.append(comp_context + octalConv(quest) + octalConv('Nothing') + (0,)) return output test_data = contextualize(fulldata, queask) final_train_data = [] def finalize(data): """ Prepares data generated by contextualize() for use in the network. """ final_data = [] for cqas in data: contextvs, contextws, qvs, qws, avs, aws, spt = cqas lspt = [spt] lengths = itertools.accumulate(len(cvec) for cvec in contextvs) context_vec = np.concatenate(contextvs) context_words = sum(contextws, []) # Location markers for the beginnings of new sentences. sentence_ends = np.array(list(lengths)) final_data.append((context_vec, sentence_ends, qvs, lspt, context_words, cqas, avs, aws)) return np.array(final_data) final_test_data = finalize(test_data) tf.reset_default_graph() # Hyperparameters # The number of dimensions used to store data passed between recurrent layers in the network. recurrent_cell_size = 128 # The number of dimensions in our word vectorizations. D = 50 # How quickly the network learns. Too high, and we may run into numeric instability # or other issues. learning_rate = 0.005 # Dropout probabilities. For a description of dropout and what these probabilities are, # see Entailment with TensorFlow. input_p, output_p = 0.5, 0.5 # How many questions we train on at a time. batch_size = 128 # Number of passes in episodic memory. We'll get to this later. passes = 4 # Feed Forward layer sizes: the number of dimensions used to store data passed from feed-forward layers. ff_hidden_size = 256 weight_decay = 0.00000001 # The strength of our regularization. Increase to encourage sparsity in episodic memory, # but makes training slower. Don't make this larger than leraning_rate. training_iterations_count = 400000 # How many questions the network trains on each time it is trained. # Some questions are counted multiple times. display_step = 1 # How many iterations of training occur before each validation check. # Input Module # Context: A [batch_size, maximum_context_length, word_vectorization_dimensions] tensor # that contains all the context information. context = tf.placeholder(tf.float32, [None, None, D], "context") context_placeholder = context # I use context as a variable name later on # input_sentence_endings: A [batch_size, maximum_sentence_count, 2] tensor that # contains the locations of the ends of sentences. input_sentence_endings = tf.placeholder(tf.int32, [None, None, 2], "sentence") # recurrent_cell_size: the number of hidden units in recurrent layers. input_gru = tf.contrib.rnn.GRUCell(recurrent_cell_size) # input_p: The probability of maintaining a specific hidden input unit. # Likewise, output_p is the probability of maintaining a specific hidden output unit. gru_drop = tf.contrib.rnn.DropoutWrapper(input_gru, input_p, output_p) # dynamic_rnn also returns the final internal state. We don't need that, and can # ignore the corresponding output (_). input_module_outputs, _ = tf.nn.dynamic_rnn(gru_drop, context, dtype=tf.float32, scope="input_module") # cs: the facts gathered from the context. cs = tf.gather_nd(input_module_outputs, input_sentence_endings) # to use every word as a fact, useful for tasks with one-sentence contexts s = input_module_outputs # Question Module # query: A [batch_size, maximum_question_length, word_vectorization_dimensions] tensor # that contains all of the questions. query = tf.placeholder(tf.float32, [None, None, D], "query") # input_query_lengths: A [batch_size, 2] tensor that contains question length information. # input_query_lengths[:,1] has the actual lengths; input_query_lengths[:,0] is a simple range() # so that it plays nice with gather_nd. input_query_lengths = tf.placeholder(tf.int32, [None, 2], "query_lengths") question_module_outputs, _ = tf.nn.dynamic_rnn(gru_drop, query, dtype=tf.float32, scope=tf.VariableScope(True, "input_module")) # q: the question states. A [batch_size, recurrent_cell_size] tensor. q = tf.gather_nd(question_module_outputs, input_query_lengths) # Episodic Memory # make sure the current memory (i.e. the question vector) is broadcasted along the facts dimension size = tf.stack([tf.constant(1), tf.shape(cs)[1], tf.constant(1)]) re_q = tf.tile(tf.reshape(q, [-1, 1, recurrent_cell_size]), size) # Final output for attention, needs to be 1 in order to create a mask output_size = 1 # Weights and biases attend_init = tf.random_normal_initializer(stddev=0.1) w_1 = tf.get_variable("attend_w1", [1, recurrent_cell_size * 7, recurrent_cell_size], tf.float32, initializer=attend_init) w_2 = tf.get_variable("attend_w2", [1, recurrent_cell_size, output_size], tf.float32, initializer=attend_init) b_1 = tf.get_variable("attend_b1", [1, recurrent_cell_size], tf.float32, initializer=attend_init) b_2 = tf.get_variable("attend_b2", [1, output_size], tf.float32, initializer=attend_init) # Regulate all the weights and biases tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(w_1)) tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(b_1)) tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(w_2)) tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(b_2)) def attention(c, mem, existing_facts): """ Custom attention mechanism. c: A [batch_size, maximum_sentence_count, recurrent_cell_size] tensor that contains all the facts from the contexts. mem: A [batch_size, maximum_sentence_count, recurrent_cell_size] tensor that contains the current memory. It should be the same memory for all facts for accurate results. existing_facts: A [batch_size, maximum_sentence_count, 1] tensor that acts as a binary mask for which facts exist and which do not. """ with tf.variable_scope("attending") as scope: # attending: The metrics by which we decide what to attend to. attending = tf.concat([c, mem, re_q, c * re_q, c * mem, (c - re_q) ** 2, (c - mem) ** 2], 2) # m1: First layer of multiplied weights for the feed-forward network. # We tile the weights in order to manually broadcast, since tf.matmul does not # automatically broadcast batch matrix multiplication as of TensorFlow 1.2. m1 = tf.matmul(attending * existing_facts, tf.tile(w_1, tf.stack([tf.shape(attending)[0], 1, 1]))) * existing_facts # bias_1: A masked version of the first feed-forward layer's bias # over only existing facts. bias_1 = b_1 * existing_facts # tnhan: First nonlinearity. In the original paper, this is a tanh nonlinearity; # choosing relu was a design choice intended to avoid issues with # low gradient magnitude when the tanh returned values close to 1 or -1. tnhan = tf.nn.relu(m1 + bias_1) # m2: Second layer of multiplied weights for the feed-forward network. # Still tiling weights for the same reason described in m1's comments. m2 = tf.matmul(tnhan, tf.tile(w_2, tf.stack([tf.shape(attending)[0], 1, 1]))) # bias_2: A masked version of the second feed-forward layer's bias. bias_2 = b_2 * existing_facts # norm_m2: A normalized version of the second layer of weights, which is used # to help make sure the softmax nonlinearity doesn't saturate. norm_m2 = tf.nn.l2_normalize(m2 + bias_2, -1) # softmaxable: A hack in order to use sparse_softmax on an otherwise dense tensor. # We make norm_m2 a sparse tensor, then make it dense again after the operation. softmax_idx = tf.where(tf.not_equal(norm_m2, 0))[:, :-1] softmax_gather = tf.gather_nd(norm_m2[..., 0], softmax_idx) softmax_shape = tf.shape(norm_m2, out_type=tf.int64)[:-1] softmaxable = tf.SparseTensor(softmax_idx, softmax_gather, softmax_shape) return tf.expand_dims(tf.sparse_tensor_to_dense(tf.sparse_softmax(softmaxable)), -1) # facts_0s: a [batch_size, max_facts_length, 1] tensor # whose values are 1 if the corresponding fact exists and 0 if not. facts_0s = tf.cast(tf.count_nonzero(input_sentence_endings[:, :, -1:], -1, keepdims=True), tf.float32) with tf.variable_scope("Episodes") as scope: attention_gru = tf.contrib.rnn.GRUCell(recurrent_cell_size) # memory: A list of all tensors that are the (current or past) memory state # of the attention mechanism. memory = [q] # attends: A list of all tensors that represent what the network attends to. attends = [] for a in range(passes): # attention mask attend_to = attention(cs, tf.tile(tf.reshape(memory[-1], [-1, 1, recurrent_cell_size]), size), facts_0s) # Inverse attention mask, for what's retained in the state. retain = 1 - attend_to # GRU pass over the facts, according to the attention mask. while_valid_index = (lambda state, index: index < tf.shape(cs)[1]) update_state = (lambda state, index: (attend_to[:, index, :] * attention_gru(cs[:, index, :], state)[0] + retain[:, index, :] * state)) # start loop with most recent memory and at the first index memory.append(tuple(tf.while_loop(while_valid_index, (lambda state, index: (update_state(state, index), index + 1)), loop_vars=[memory[-1], 0]))[0]) attends.append(attend_to) # Reuse variables so the GRU pass uses the same variables every pass. scope.reuse_variables() # Answer Module # a0: Final memory state. (Input to answer module) a0 = tf.concat([memory[-1], q], -1) # fc_init: Initializer for the final fully connected layer's weights. fc_init = tf.random_normal_initializer(stddev=0.1) with tf.variable_scope("answer"): # w_answer: The final fully connected layer's weights. w_answer = tf.get_variable("weight", [recurrent_cell_size * 2, D], tf.float32, initializer=fc_init) # Regulate the fully connected layer's weights tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(w_answer)) # The regressed word. This isn't an actual word yet; # we still have to find the closest match. logit = tf.expand_dims(tf.matmul(a0, w_answer), 1) # Make a mask over which words exist. with tf.variable_scope("ending"): all_ends = tf.reshape(input_sentence_endings, [-1, 2]) range_ends = tf.range(tf.shape(all_ends)[0]) ends_indices = tf.stack([all_ends[:, 0], range_ends], axis=1) ind = tf.reduce_max(tf.scatter_nd(ends_indices, all_ends[:, 1], [tf.shape(q)[0], tf.shape(all_ends)[0]]), axis=-1) range_ind = tf.range(tf.shape(ind)[0]) mask_ends = tf.cast(tf.scatter_nd(tf.stack([ind, range_ind], axis=1), tf.ones_like(range_ind), [tf.reduce_max(ind) + 1, tf.shape(ind)[0]]), bool) # A bit of a trick. With the locations of the ends of the mask (the last periods in # each of the contexts) as 1 and the rest as 0, we can scan with exclusive or # (starting from all 1). For each context in the batch, this will result in 1s # up until the marker (the location of that last period) and 0s afterwards. mask = tf.scan(tf.logical_xor, mask_ends, tf.ones_like(range_ind, dtype=bool)) # We score each possible word inversely with their Euclidean distance to the regressed word. # The highest score (lowest distance) will correspond to the selected word. logits = -tf.reduce_sum(tf.square(context * tf.transpose(tf.expand_dims( tf.cast(mask, tf.float32), -1), [1, 0, 2]) - logit), axis=-1, name='logits') # Training # gold_standard: The real answers. gold_standard = tf.placeholder(tf.float32, [None, 1, D], "answer") with tf.variable_scope('accuracy'): eq = tf.equal(context, gold_standard) corrbool = tf.reduce_all(eq, -1, name='corrbool') logloc = tf.reduce_max(logits, -1, keepdims=True) # locs: A boolean tensor that indicates where the score # matches the minimum score. This happens on multiple dimensions, # so in the off chance there's one or two indexes that match # we make sure it matches in all indexes. locs = tf.equal(logits, logloc) # correctsbool: A boolean tensor that indicates for which # words in the context the score always matches the minimum score. correctsbool = tf.reduce_any(tf.logical_and(locs, corrbool), -1) # corrects: A tensor that is simply correctsbool cast to floats. corrects = tf.where(correctsbool, tf.ones_like(correctsbool, dtype=tf.float32), tf.zeros_like(correctsbool, dtype=tf.float32)) # corr: corrects, but for the right answer instead of our selected answer. corr = tf.where(corrbool, tf.ones_like(corrbool, dtype=tf.float32), tf.zeros_like(corrbool, dtype=tf.float32)) with tf.variable_scope("loss"): # Use sigmoid cross entropy as the base loss, # with our distances as the relative probabilities. There are # multiple correct labels, for each location of the answer word within the context. loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=tf.nn.l2_normalize(logits, -1), labels=corr) # Add regularization losses, weighted by weight_decay. total_loss = tf.reduce_mean(loss) + weight_decay * tf.add_n( tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) # TensorFlow's default implementation of the Adam optimizer works. We can adjust more than # just the learning rate, but it's not necessary to find a very good optimum. optimizer = tf.train.AdamOptimizer(learning_rate) # Once we have an optimizer, we ask it to minimize the loss # in order to work towards the proper training. opt_op = optimizer.minimize(total_loss) # Initialize variables init = tf.global_variables_initializer() # Launch the TensorFlow session sess = tf.Session() sess.run(init) def prep_batch(batch_data, more_data=False): """ Prepare all the preproccessing that needs to be done on a batch-by-batch basis. """ context_vec, sentence_ends, questionvs, spt, context_words, cqas, answervs, _ = zip(*batch_data) ends = list(sentence_ends) maxend = max(map(len, ends)) aends = np.zeros((len(ends), maxend)) for index, i in enumerate(ends): for indexj, x in enumerate(i): aends[index, indexj] = x - 1 new_ends = np.zeros(aends.shape + (2,)) for index, x in np.ndenumerate(aends): new_ends[index + (0,)] = index[0] new_ends[index + (1,)] = x contexts = list(context_vec) max_context_length = max([len(x) for x in contexts]) contextsize = list(np.array(contexts[0]).shape) contextsize[0] = max_context_length final_contexts = np.zeros([len(contexts)] + contextsize) contexts = [np.array(x) for x in contexts] for i, context in enumerate(contexts): final_contexts[i, 0:len(context), :] = context max_query_length = max(len(x) for x in questionvs) querysize = list(np.array(questionvs[0]).shape) querysize[:1] = [len(questionvs), max_query_length] queries = np.zeros(querysize) querylengths = np.array(list(zip(range(len(questionvs)), [len(q) - 1 for q in questionvs]))) questions = [np.array(q) for q in questionvs] for i, question in enumerate(questions): queries[i, 0:len(question), :] = question data = {context_placeholder: final_contexts, input_sentence_endings: new_ends, query: queries, input_query_lengths: querylengths, gold_standard: answervs} return (data, context_words, cqas) if more_data else data # Use TQDM if installed tqdm_installed = False # Prepare validation set batch = np.random.randint(final_test_data.shape[0], size=batch_size * 10) batch_data = final_test_data[batch] validation_set, val_context_words, val_cqas = prep_batch(batch_data, True) holder = [corrbool, locs, total_loss, logits, facts_0s, w_1] + attends + [query, cs, question_module_outputs] print('Starting session') start_time = time.time() ancr = sess.run([corrbool, locs, total_loss, logits, facts_0s, w_1] + attends + [query, cs, question_module_outputs], feed_dict=validation_set) elapsed_time = time.time() - start_time print(elapsed_time) a = ancr[0] n = ancr[1] cr = ancr[2] attenders = np.array(ancr[6:-3]) faq = np.sum(ancr[4], axis=(-1, -2)) # Number of facts in each context limit = 1 # Locations of responses within contexts indices = np.argmax(n, axis=1) # Locations of actual answers within contexts indicesc = np.argmax(a, axis=1) response = "" ans = 0 inp = '' for i, e, cw, cqa in list(zip(indices, indicesc, val_context_words, val_cqas))[:limit]: ccc = " ".join(cw) print("TEXT: ", ccc) inp = ccc print("QUESTION: ", " ".join(cqa[3])) print("RESPONSE: ", cw[i], ["Correct", "Incorrect"][i != e]) ans = i print("EXPECTED: ", cw[e]) print() # For safety, return this if nothing is found sess.close() print('--') tot_index = 0 for line in fulldata: tot_index = tot_index + len(line) if tot_index >= ans: return line return response
def _build_graph(self, hidden_dim, env_state_size, action_space_dim, learning_rate=0.01, activation=tf.nn.elu, scope_name='policy-network', **kwargs): with tf.variable_scope(scope_name) as scope: # Size variables with tf.variable_scope('dimensions'): self.hidden_dim = hidden_dim self.env_state_dim = env_state_size self.action_space_dim = action_space_dim # model variables with tf.variable_scope('model-parameters'): self.rnn_cell = tf.contrib.rnn.BasicRNNCell( hidden_dim, activation=activation) self.initial_state = tf.get_variable( 'rnn_init_state', [1, hidden_dim], initializer=tf.contrib.layers.variance_scaling_initializer( )) self.output_weights = tf.get_variable( 'output_weights', [hidden_dim, action_space_dim], initializer=tf.contrib.layers.variance_scaling_initializer( )) self.output_bias = tf.get_variable( 'output_bias', [action_space_dim], initializer=tf.contrib.layers.variance_scaling_initializer( )) # single step self.env_state = tf.placeholder(tf.float32, [1, env_state_size], name="state") self.rnn_state = tf.placeholder(tf.float32, [1, hidden_dim]) with tf.variable_scope('single-step-rnn'): self.rnn_state_val = None self.step_rnn, _ = self.rnn_cell(self.env_state, self.rnn_state) self.action_probability = tf.nn.softmax( tf.matmul(self.rnn_state, self.output_weights) + self.output_bias) # multiple episodes self.batch_size = tf.placeholder(tf.int32, name='max-episode-len') # returns ~ [n, max(epi_len)] self.returns = tf.placeholder(tf.float32, [None, None], 'returns') # env_states ~ [n, max(epi_len), env_state_size] self.env_states = tf.placeholder(tf.float32, [None, None, env_state_size], 'states') # actions ~ [n, max(epi_len), env_state_size] self.actions = tf.placeholder(tf.int32, [None, None, 3], 'actions') # tiling initial state self.initial_states = tf.tile(self.initial_state, multiples=[self.batch_size, 1]) with tf.variable_scope('multi-step-rnn'): with tf.variable_scope('rnn'): # rnn_states ~ [n, max(epi_len), hidden_dim] self.rnn_states, _ = tf.nn.dynamic_rnn( self.rnn_cell, inputs=self.env_states, initial_state=self.initial_states, dtype=tf.float32) with tf.variable_scope('action-p'): # logits, action_probabilities ~ [n, max(epi_len), action_space_dim] self.logits = tf.tensordot( self.rnn_states, self.output_weights, axes=[[2], [0]]) + self.output_bias self.action_probabilities = tf.nn.softmax(self.logits) # obs_action_probabilities ~ [n, max(epi_len)] self.obs_action_probabilities = tf.gather_nd( self.action_probabilities, self.actions) with tf.variable_scope('train'): # calculate path-wise likelihood ratios self.episodic_loss = tf.reduce_sum( -tf.log(self.obs_action_probabilities + 1e-10) * self.returns, axis=1) # average over episodes self.loss = tf.reduce_mean(self.episodic_loss) self.optimizer = tf.train.RMSPropOptimizer( learning_rate=learning_rate) self.train_op = self.optimizer.minimize( self.loss, global_step=tf.train.get_global_step()) # summary variables with tf.variable_scope('summary'): tf.summary.tensor_summary('rnn-states', self.rnn_states) tf.summary.scalar('loss', self.loss) self.summary_op = tf.summary.merge_all()
def din_fcn_attention(query, rnn_output, keys_len, scope_name, stag='null', mode='SUM', softmax_stag=1, time_major=False, return_alphas=False, for_cnn=False): if isinstance(rnn_output, tuple): # In case of Bi-RNN, concatenate the forward and the backward RNN outputs. rnn_output = tf.concat(rnn_output, 2) if len(rnn_output.get_shape().as_list()) == 2: rnn_output = tf.expand_dims(rnn_output, 1) if time_major: # (T,B,D) => (B,T,D) rnn_output = array_ops.transpose(rnn_output, [1, 0, 2]) # Trainable parameters # mask = tf.equal(mask, tf.ones_like(mask)) # query_size = query.get_shape().as_list()[-1] rnn_output_size = rnn_output.get_shape().as_list()[ -1] # D value - hidden size of the RNN layer query = tf.layers.dense(query, rnn_output_size, activation=None, name=scope_name + '_f1' + stag) query = prelu(query, scope=scope_name) queries = tf.tile(query, [1, tf.shape(rnn_output)[1]]) queries = tf.reshape(queries, tf.shape(rnn_output)) din_all = tf.concat( [queries, rnn_output, queries - rnn_output, queries * rnn_output], axis=-1) d_layer_1_all = tf.layers.dense(din_all, 80, activation=tf.nn.sigmoid, name=scope_name + 'f1_att' + stag) d_layer_2_all = tf.layers.dense(d_layer_1_all, 40, activation=tf.nn.sigmoid, name=scope_name + 'f2_att' + stag) d_layer_3_all = tf.layers.dense(d_layer_2_all, 1, activation=None, name=scope_name + 'f3_att' + stag) d_layer_3_all = tf.reshape(d_layer_3_all, [-1, 1, tf.shape(rnn_output)[1]]) scores = d_layer_3_all # Mask key_masks = tf.sequence_mask(keys_len, tf.shape(rnn_output)[1]) # [B, T] key_masks = tf.expand_dims(key_masks, 1) # [B, 1, T] paddings = tf.ones_like(scores) * (-2**32 + 1) if not for_cnn: scores = tf.where(key_masks, scores, paddings) # [B, 1, T] # Scale # scores = scores / (facts.get_shape().as_list()[-1] ** 0.5) # Activation if softmax_stag: scores = tf.nn.softmax(scores) # [B, 1, T] # Weighted sum if mode == 'SUM': output = tf.matmul(scores, rnn_output) # [B, 1, H] # output = tf.reshape(output, [-1, tf.shape(facts)[-1]]) else: scores = tf.reshape(scores, [-1, tf.shape(rnn_output)[1]]) output = rnn_output * tf.expand_dims(scores, -1) output = tf.reshape(output, tf.shape(rnn_output)) if return_alphas: return output, scores return output
def _model_fn(features, labels, mode, params, model, variable_filter_fn=None): """Model defination for the RetinaNet model based on ResNet-50. Args: features: the input image tensor with shape [batch_size, height, width, 3]. The height and width are fixed and equal. labels: the input labels in a dictionary. The labels include class targets and box targets which are dense label maps. The labels are generated from get_input_fn function in data/dataloader.py mode: the mode of TPUEstimator including TRAIN, EVAL, and PREDICT. params: the dictionary defines hyperparameters of model. The default settings are in default_hparams function in this file. model: the RetinaNet model outputs class logits and box regression outputs. variable_filter_fn: the filter function that takes trainable_variables and returns the variable list after applying the filter rule. Returns: tpu_spec: the TPUEstimatorSpec to run training, evaluation, or prediction. """ cls_outputs, box_outputs = model(features, min_level=params['min_level'], max_level=params['max_level'], num_classes=params['num_classes'], num_anchors=len(params['aspect_ratios'] * params['num_scales']), is_training_bn=params['is_training_bn']) levels = cls_outputs.keys() # First check if it is in PREDICT mode. if mode == tf.estimator.ModeKeys.PREDICT: predictions = { 'image': features, } for level in levels: predictions['cls_outputs_%d' % level] = cls_outputs[level] predictions['box_outputs_%d' % level] = box_outputs[level] return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) # Load pretrained model from checkpoint. if params['resnet_checkpoint'] and mode == tf.estimator.ModeKeys.TRAIN: def scaffold_fn(): """Loads pretrained model through scaffold function.""" tf.train.init_from_checkpoint(params['resnet_checkpoint'], { '/': 'resnet50/', }) return tf.train.Scaffold() else: scaffold_fn = None # Set up training loss and learning rate. global_step = tf.train.get_global_step() learning_rate = _learning_rate_schedule(params['learning_rate'], params['lr_warmup_init'], params['lr_warmup_step'], params['lr_drop_step'], global_step) # cls_loss and box_loss are for logging. only total_loss is optimized. total_loss, cls_loss, box_loss = _detection_loss(cls_outputs, box_outputs, labels, params) if mode == tf.estimator.ModeKeys.TRAIN: optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=params['momentum']) if params['use_tpu']: optimizer = tpu_optimizer.CrossShardOptimizer(optimizer) # Batch norm requires update_ops to be added as a train_op dependency. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) var_list = variable_filter_fn( tf.trainable_variables()) if variable_filter_fn else None with tf.control_dependencies(update_ops): train_op = optimizer.minimize(total_loss, global_step, var_list=var_list) else: train_op = None # Evaluation only works on GPU/CPU host and batch_size=1 eval_metrics = None if mode == tf.estimator.ModeKeys.EVAL: def metric_fn(**kwargs): """Evaluation metric fn. Performed on CPU, do not reference TPU ops.""" eval_anchors = anchors.Anchors(params['min_level'], params['max_level'], params['num_scales'], params['aspect_ratios'], params['anchor_scale'], params['image_size']) anchor_labeler = anchors.AnchorLabeler(eval_anchors, params['num_classes']) cls_loss = tf.metrics.mean(kwargs['cls_loss_repeat']) box_loss = tf.metrics.mean(kwargs['box_loss_repeat']) # add metrics to output cls_outputs = {} box_outputs = {} for level in range(params['min_level'], params['max_level'] + 1): cls_outputs[level] = kwargs['cls_outputs_%d' % level] box_outputs[level] = kwargs['box_outputs_%d' % level] detections = anchor_labeler.generate_detections( cls_outputs, box_outputs, kwargs['source_ids']) eval_metric = coco_metric.EvaluationMetric(params['val_json_file']) coco_metrics = eval_metric.estimator_metric_fn( detections, kwargs['image_scales']) # Add metrics to output. output_metrics = { 'cls_loss': cls_loss, 'box_loss': box_loss, } output_metrics.update(coco_metrics) return output_metrics batch_size = params['batch_size'] cls_loss_repeat = tf.reshape( tf.tile(tf.expand_dims(cls_loss, 0), [ batch_size, ]), [batch_size, 1]) box_loss_repeat = tf.reshape( tf.tile(tf.expand_dims(box_loss, 0), [ batch_size, ]), [batch_size, 1]) metric_fn_inputs = { 'cls_loss_repeat': cls_loss_repeat, 'box_loss_repeat': box_loss_repeat, 'source_ids': labels['source_ids'], 'image_scales': labels['image_scales'], } for level in range(params['min_level'], params['max_level'] + 1): metric_fn_inputs['cls_outputs_%d' % level] = cls_outputs[level] metric_fn_inputs['box_outputs_%d' % level] = box_outputs[level] eval_metrics = (metric_fn, metric_fn_inputs) return tpu_estimator.TPUEstimatorSpec(mode=mode, loss=total_loss, train_op=train_op, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn)
def __init__(self, config, name): assert name in ('validation', 'training', 'test') self.name = name logging.debug('{} - model - initialize'.format(self.name)) self.is_training = True if self.name == 'training' else False self.config = config if not self.is_training: self.reinitializable_iter_for_dataset = None self.batch = self._gen_batch_fn() # generate mini-batch with tf.name_scope(self.name): with tf.variable_scope('full_conv', reuse=tf.AUTO_REUSE): logits_stereo = self._nn_model_fn() logits_stereo_flattened = flatten_maybe_padded_sequences( maybe_padded_sequences=logits_stereo, lengths=tf.tile(input=self.batch['num_frames'], multiples=[2])) logits_left_flattened, logits_right_flattened = tf.split( value=logits_stereo_flattened, num_or_size_splits=2, axis=0) logits_minor_flattened = tf.minimum(logits_left_flattened, logits_right_flattened) logits_larger_flattened = tf.maximum(logits_left_flattened, logits_right_flattened) labels_bool_flattened = flatten_maybe_padded_sequences( maybe_padded_sequences=self.batch['label'], lengths=self.batch['num_frames']) negated_labels_bool_flattened = tf.logical_not(labels_bool_flattened) labels_float_flattened = tf.cast(x=labels_bool_flattened, dtype=tf.float32) #When label is True, choose the smaller logits. Otherwise, choose the larger logits logits_mono_flattened = tf.where( tf.equal(labels_bool_flattened, True), logits_minor_flattened, logits_larger_flattened) #cross-entropy #loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels_float_flattened, logits=logits_mono_flattened) #weighted cross-entropy #A value `pos_weights > 1` decreases the false negative count, hence increasing the recall. #Conversely setting `pos_weights < 1` decreases the false positive count and increases the precision. loss = tf.nn.weighted_cross_entropy_with_logits(targets=labels_float_flattened, logits=logits_mono_flattened, pos_weight=1.1) #focal loss #loss = MiscFns.focal_loss(labels=labels_float_flattened, logits=logits_mono_flattened) loss = tf.reduce_mean(loss) if self.is_training: global_step = tf.train.get_or_create_global_step() learning_rate = tf.train.exponential_decay(self.config.learning_rate, global_step, \ self.config.batches_per_epoch * 7, 0.7, staircase=True) _update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if _update_ops: with tf.control_dependencies(_update_ops): training_op = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=global_step) else: training_op = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=global_step) pred_labels_flattened = tf.greater(logits_left_flattened+logits_right_flattened, 0) negated_pred_labels_flattened = tf.logical_not(pred_labels_flattened) # individual and ensemble statistics for test and validation if not self.is_training: with tf.name_scope('individual_and_ensemble_stats'): with tf.variable_scope('{}_local_vars'.format(self.name), reuse=tf.AUTO_REUSE): individual_tps_fps_tns_fns_var = tf.get_variable( name='individual_tps_fps_tns_fns', shape=[len(self.config.file_names[self.name]), 4], dtype=tf.int32, initializer=tf.zeros_initializer, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES] ) acc_loss_var = tf.get_variable( name='acc_loss', shape=[], dtype=tf.float32, initializer=tf.zeros_initializer, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES] ) batch_counter_var = tf.get_variable( name='batch_counter', shape=[], dtype=tf.int32, initializer=tf.zeros_initializer, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES] ) loop_var_proto = collections.namedtuple( 'loop_var_proto', ['sample_idx', 'batch_size', 'preds', 'negated_preds', 'labels', 'negated_labels', 'lengths', 'me_ids']) def cond_fn(loop_var): return tf.less(loop_var.sample_idx, loop_var.batch_size) def body_fn(loop_var): start_pos = tf.reduce_sum(loop_var.lengths[:loop_var.sample_idx]) end_pos = start_pos + loop_var.lengths[loop_var.sample_idx] cur_preds = loop_var.preds negated_cur_preds = loop_var.negated_preds cur_labels = loop_var.labels negated_cur_labels = loop_var.negated_labels cur_preds, negated_cur_preds, cur_labels, negated_cur_labels = \ [value[start_pos:end_pos] for value in [cur_preds, negated_cur_preds, cur_labels, negated_cur_labels]] tps = tf.logical_and(cur_preds, cur_labels) fps = tf.logical_and(cur_preds, negated_cur_labels) tns = tf.logical_and(negated_cur_preds, negated_cur_labels) fns = tf.logical_and(negated_cur_preds, cur_labels) tps, fps, tns, fns = \ [tf.reduce_sum(tf.cast(value, tf.int32)) for value in [tps, fps, tns, fns]] me_id = loop_var.me_ids[loop_var.sample_idx] stats_var = individual_tps_fps_tns_fns_var _new_value = stats_var[me_id] + tf.convert_to_tensor([tps, fps, tns, fns]) _update_stats = tf.scatter_update( stats_var, me_id, _new_value, use_locking=True) with tf.control_dependencies([_update_stats]): sample_idx = loop_var.sample_idx + 1 loop_var = loop_var_proto( sample_idx=sample_idx, batch_size=loop_var.batch_size, preds=loop_var.preds, negated_preds=loop_var.negated_preds, labels=loop_var.labels, negated_labels=loop_var.negated_labels, lengths=loop_var.lengths, me_ids=loop_var.me_ids ) return [loop_var] sample_idx = tf.constant(0, dtype=tf.int32) cur_batch_size = tf.shape(self.batch['num_frames'])[0] loop_var = loop_var_proto( sample_idx=sample_idx, batch_size=cur_batch_size, preds=pred_labels_flattened, negated_preds=negated_pred_labels_flattened, labels=labels_bool_flattened, negated_labels=negated_labels_bool_flattened, lengths=self.batch['num_frames'], me_ids=self.batch['me_id'] ) final_sample_idx = tf.while_loop( cond=cond_fn, body=body_fn, loop_vars=[loop_var], parallel_iterations=self.config.batch_size, back_prop=False, return_same_structure=True )[0].sample_idx individual_tps_fps_tns_fns_float = tf.cast(individual_tps_fps_tns_fns_var, tf.float32) tps, fps, _, fns = tf.unstack(individual_tps_fps_tns_fns_float, axis=1) me_wise_precisions = tps / (tps + fps + 1e-7) me_wise_recalls = tps / (tps + fns + 1e-7) me_wise_f1s = 2. * me_wise_precisions * me_wise_recalls / \ (me_wise_precisions + me_wise_recalls + 1e-7) me_wise_prfs = tf.stack([me_wise_precisions, me_wise_recalls, me_wise_f1s], axis=1) assert me_wise_prfs.shape.as_list() == [len(self.config.file_names[self.name]), 3] average_me_wise_prf = tf.reduce_mean(me_wise_prfs, axis=0) assert average_me_wise_prf.shape.as_list() == [3] # ensemble stats ensemble_tps_fps_tns_fns = tf.reduce_sum(individual_tps_fps_tns_fns_var, axis=0) tps, fps, _, fns = tf.unstack(tf.cast(ensemble_tps_fps_tns_fns, tf.float32)) en_precision = tps / (tps + fps + 1e-7) en_recall = tps / (tps + fns + 1e-7) en_f1 = 2. * en_precision * en_recall / (en_precision + en_recall + 1e-7) batch_counter_update_op = tf.assign_add(batch_counter_var, 1) acc_loss_update_op = tf.assign_add(acc_loss_var, loss) ensemble_prf_and_loss = tf.convert_to_tensor( [en_precision, en_recall, en_f1, acc_loss_var / tf.cast(batch_counter_var, tf.float32)]) update_op_after_each_batch = tf.group( final_sample_idx, batch_counter_update_op, acc_loss_update_op, name='grouped update ops to be run after each batch'.replace(' ', '_')) stats_after_each_epoch = dict( individual_tps_fps_tns_fns=individual_tps_fps_tns_fns_var, individual_prfs=me_wise_prfs, ensemble_tps_fps_tns_fns=ensemble_tps_fps_tns_fns, ensemble_prf_and_loss=ensemble_prf_and_loss, average_prf=average_me_wise_prf ) ''' # ensemble stats for training if self.is_training: with tf.name_scope('ensemble_stats'): with tf.variable_scope('{}_local_vars'.format(self.name), reuse=tf.AUTO_REUSE): ensemble_tps_fps_tns_fns_var = tf.get_variable( name='ensemble_tps_fps_tns_fns', shape=[4], dtype=tf.int32, initializer=tf.zeros_initializer, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES] ) acc_loss_var = tf.get_variable( name='acc_loss', shape=[], dtype=tf.float32, initializer=tf.zeros_initializer, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES] ) batch_counter_var = tf.get_variable( name='batch_counter', shape=[], dtype=tf.int32, initializer=tf.zeros_initializer, trainable=False, collections=[tf.GraphKeys.LOCAL_VARIABLES] ) tps = tf.logical_and(pred_labels_flattened, labels_bool_flattened) fps = tf.logical_and(pred_labels_flattened, negated_labels_bool_flattened) tns = tf.logical_and(negated_pred_labels_flattened, negated_labels_bool_flattened) fns = tf.logical_and(negated_pred_labels_flattened, labels_bool_flattened) tps, fps, tns, fns = [tf.reduce_sum(tf.cast(value, tf.int32)) for value in [tps, fps, tns, fns]] ensemble_tps_fps_tns_fns_update_op = tf.assign_add( ensemble_tps_fps_tns_fns_var, tf.convert_to_tensor([tps, fps, tns, fns])) acc_loss_update_op = tf.assign_add(acc_loss_var, loss) batch_counter_update_op = tf.assign_add(batch_counter_var, 1) ensemble_tps_fps_tns_fns_float = tf.cast(ensemble_tps_fps_tns_fns_var, tf.float32) tps, fps, _, fns = tf.unstack(ensemble_tps_fps_tns_fns_float) ensemble_precision = tps / (tps + fps + 1e-7) ensemble_recall = tps / (tps + fns + 1e-7) ensemble_f1 = 2. * ensemble_precision * ensemble_recall / \ (ensemble_precision + ensemble_recall + 1e-7) ensemble_loss = acc_loss_var / tf.cast(batch_counter_var, tf.float32) ensemble_prf_and_loss = tf.convert_to_tensor( [ensemble_precision, ensemble_recall, ensemble_f1, ensemble_loss]) update_op_after_each_batch = tf.group( batch_counter_update_op, ensemble_tps_fps_tns_fns_update_op, acc_loss_update_op) stats_after_each_epoch = dict( ensemble_tps_fps_tns_fns=ensemble_tps_fps_tns_fns_var, ensemble_prf_and_loss=ensemble_prf_and_loss ) ''' # define tensorboard summaries with tf.name_scope('tensorboard_summary'): with tf.name_scope('statistics'): if not self.is_training: list_of_summaries = [] with tf.name_scope('ensemble'): p, r, f, lo = tf.unstack(stats_after_each_epoch['ensemble_prf_and_loss']) items_for_summary = dict(precision=p, recall=r, f1=f, average_loss=lo) for item_name, item_value in items_for_summary.items(): tmp = tf.summary.scalar(item_name, item_value) list_of_summaries.append(tmp) with tf.name_scope('individual'): p, r, f = tf.unstack(stats_after_each_epoch['average_prf']) items_for_summary = dict(precision=p, recall=r, f1=f) for item_name, item_value in items_for_summary.items(): tmp = tf.summary.scalar(item_name, item_value) list_of_summaries.append(tmp) statistical_summary = tf.summary.merge(list_of_summaries) ''' else: list_of_summaries = [] with tf.name_scope('ensemble'): p, r, f, lo = tf.unstack(stats_after_each_epoch['ensemble_prf_and_loss']) items_for_summary = dict(precision=p, recall=r, f1=f, average_loss=lo) for item_name, item_value in items_for_summary.items(): tmp = tf.summary.scalar(item_name, item_value) list_of_summaries.append(tmp) statistical_summary = tf.summary.merge(list_of_summaries) ''' with tf.name_scope('images'): image_summary_length = int(6 * 16000 // 512) labels_uint8 = self.batch['label'][:, :image_summary_length, :] labels_uint8 = tf.cast(labels_uint8, tf.uint8) * 255 #assert labels_uint8.dtype == tf.uint8 labels_uint8 = labels_uint8[..., None] _logits_left = tf.split(value=logits_stereo, num_or_size_splits=2, axis=0)[0] logits_prob_uint8 = tf.sigmoid(_logits_left[:, :image_summary_length, :]) logits_prob_uint8 = tf.cast(logits_prob_uint8 * 255., tf.uint8) logits_prob_uint8 = logits_prob_uint8[..., None] images = tf.concat([labels_uint8, logits_prob_uint8, tf.zeros_like(labels_uint8)], axis=-1) images = tf.transpose(images, [0, 2, 1, 3]) images.set_shape([None, 88, image_summary_length, 3]) image_summary = tf.summary.image('images', images) if self.is_training: with tf.name_scope('params'): var_summary_dict = dict() for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES): var_summary_dict[var.op.name] = tf.summary.histogram(var.op.name, var) param_summary = tf.summary.merge(list(var_summary_dict.values())) if self.is_training: op_dict = dict( training_op=training_op, #tb_summary=dict(statistics=statistical_summary, image=image_summary, parameter=param_summary), #tb_summary=dict(image=image_summary, parameter=param_summary), #update_op_after_each_batch=update_op_after_each_batch, #statistics_after_each_epoch=stats_after_each_epoch ) else: op_dict = dict( tb_summary=dict(statistics=statistical_summary, image=image_summary), update_op_after_each_batch=update_op_after_each_batch, statistics_after_each_epoch=stats_after_each_epoch ) self.op_dict = op_dict
def batch_multiclass_non_max_suppression(boxes, scores, score_thresh, iou_thresh, max_size_per_class, max_total_size=0, clip_window=None, change_coordinate_frame=False, num_valid_boxes=None, masks=None, additional_fields=None, scope=None, parallel_iterations=32): """Multi-class version of non maximum suppression that operates on a batch. This op is similar to `multiclass_non_max_suppression` but operates on a batch of boxes and scores. See documentation for `multiclass_non_max_suppression` for details. Args: boxes: A [batch_size, num_anchors, q, 4] float32 tensor containing detections. If `q` is 1 then same boxes are used for all classes otherwise, if `q` is equal to number of classes, class-specific boxes are used. scores: A [batch_size, num_anchors, num_classes] float32 tensor containing the scores for each of the `num_anchors` detections. score_thresh: scalar threshold for score (low scoring boxes are removed). iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap with previously selected boxes are removed). max_size_per_class: maximum number of retained boxes per class. max_total_size: maximum number of boxes retained over all classes. By default returns all boxes retained after capping boxes per class. clip_window: A float32 tensor of shape [batch_size, 4] where each entry is of the form [y_min, x_min, y_max, x_max] representing the window to clip boxes to before performing non-max suppression. This argument can also be a tensor of shape [4] in which case, the same clip window is applied to all images in the batch. If clip_widow is None, all boxes are used to perform non-max suppression. change_coordinate_frame: Whether to normalize coordinates after clipping relative to clip_window (this can only be set to True if a clip_window is provided) num_valid_boxes: (optional) a Tensor of type `int32`. A 1-D tensor of shape [batch_size] representing the number of valid boxes to be considered for each image in the batch. This parameter allows for ignoring zero paddings. masks: (optional) a [batch_size, num_anchors, q, mask_height, mask_width] float32 tensor containing box masks. `q` can be either number of classes or 1 depending on whether a separate mask is predicted per class. additional_fields: (optional) If not None, a dictionary that maps keys to tensors whose dimensions are [batch_size, num_anchors, ...]. scope: tf scope name. parallel_iterations: (optional) number of batch items to process in parallel. Returns: 'nmsed_boxes': A [batch_size, max_detections, 4] float32 tensor containing the non-max suppressed boxes. 'nmsed_scores': A [batch_size, max_detections] float32 tensor containing the scores for the boxes. 'nmsed_classes': A [batch_size, max_detections] float32 tensor containing the class for boxes. 'nmsed_masks': (optional) a [batch_size, max_detections, mask_height, mask_width] float32 tensor containing masks for each selected box. This is set to None if input `masks` is None. 'nmsed_additional_fields': (optional) a dictionary of [batch_size, max_detections, ...] float32 tensors corresponding to the tensors specified in the input `additional_fields`. This is not returned if input `additional_fields` is None. 'num_detections': A [batch_size] int32 tensor indicating the number of valid detections per batch item. Only the top num_detections[i] entries in nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The rest of the entries are zero paddings. Raises: ValueError: if `q` in boxes.shape is not 1 or not equal to number of classes as inferred from scores.shape. """ q = boxes.shape[2].value num_classes = scores.shape[2].value if q != 1 and q != num_classes: raise ValueError('third dimension of boxes must be either 1 or equal ' 'to the third dimension of scores') if change_coordinate_frame and clip_window is None: raise ValueError( 'if change_coordinate_frame is True, then a clip_window' 'must be specified.') original_masks = masks original_additional_fields = additional_fields with tf.name_scope(scope, 'BatchMultiClassNonMaxSuppression'): boxes_shape = boxes.shape batch_size = boxes_shape[0].value num_anchors = boxes_shape[1].value if batch_size is None: batch_size = tf.shape(boxes)[0] if num_anchors is None: num_anchors = tf.shape(boxes)[1] # If num valid boxes aren't provided, create one and mark all boxes as # valid. if num_valid_boxes is None: num_valid_boxes = tf.ones([batch_size], dtype=tf.int32) * num_anchors # If masks aren't provided, create dummy masks so we can only have one copy # of _single_image_nms_fn and discard the dummy masks after map_fn. if masks is None: masks_shape = tf.stack([batch_size, num_anchors, 1, 0, 0]) masks = tf.zeros(masks_shape) if clip_window is None: clip_window = tf.stack([ tf.reduce_min(boxes[:, :, :, 0]), tf.reduce_min(boxes[:, :, :, 1]), tf.reduce_max(boxes[:, :, :, 2]), tf.reduce_max(boxes[:, :, :, 3]) ]) if clip_window.shape.ndims == 1: clip_window = tf.tile(tf.expand_dims(clip_window, 0), [batch_size, 1]) if additional_fields is None: additional_fields = {} def _single_image_nms_fn(args): """Runs NMS on a single image and returns padded output. Args: args: A list of tensors consisting of the following: per_image_boxes - A [num_anchors, q, 4] float32 tensor containing detections. If `q` is 1 then same boxes are used for all classes otherwise, if `q` is equal to number of classes, class-specific boxes are used. per_image_scores - A [num_anchors, num_classes] float32 tensor containing the scores for each of the `num_anchors` detections. per_image_masks - A [num_anchors, q, mask_height, mask_width] float32 tensor containing box masks. `q` can be either number of classes or 1 depending on whether a separate mask is predicted per class. per_image_clip_window - A 1D float32 tensor of the form [ymin, xmin, ymax, xmax] representing the window to clip the boxes to. per_image_additional_fields - (optional) A variable number of float32 tensors each with size [num_anchors, ...]. per_image_num_valid_boxes - A tensor of type `int32`. A 1-D tensor of shape [batch_size] representing the number of valid boxes to be considered for each image in the batch. This parameter allows for ignoring zero paddings. Returns: 'nmsed_boxes': A [max_detections, 4] float32 tensor containing the non-max suppressed boxes. 'nmsed_scores': A [max_detections] float32 tensor containing the scores for the boxes. 'nmsed_classes': A [max_detections] float32 tensor containing the class for boxes. 'nmsed_masks': (optional) a [max_detections, mask_height, mask_width] float32 tensor containing masks for each selected box. This is set to None if input `masks` is None. 'nmsed_additional_fields': (optional) A variable number of float32 tensors each with size [max_detections, ...] corresponding to the input `per_image_additional_fields`. 'num_detections': A [batch_size] int32 tensor indicating the number of valid detections per batch item. Only the top num_detections[i] entries in nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The rest of the entries are zero paddings. """ per_image_boxes = args[0] per_image_scores = args[1] per_image_masks = args[2] per_image_clip_window = args[3] per_image_additional_fields = { key: value for key, value in zip(additional_fields, args[4:-1]) } per_image_num_valid_boxes = args[-1] per_image_boxes = tf.reshape( tf.slice(per_image_boxes, 3 * [0], tf.stack([per_image_num_valid_boxes, -1, -1])), [-1, q, 4]) per_image_scores = tf.reshape( tf.slice(per_image_scores, [0, 0], tf.stack([per_image_num_valid_boxes, -1])), [-1, num_classes]) per_image_masks = tf.reshape( tf.slice(per_image_masks, 4 * [0], tf.stack([per_image_num_valid_boxes, -1, -1, -1])), [ -1, q, per_image_masks.shape[2].value, per_image_masks.shape[3].value ]) if per_image_additional_fields is not None: for key, tensor in per_image_additional_fields.items(): additional_field_shape = tensor.get_shape() additional_field_dim = len(additional_field_shape) per_image_additional_fields[key] = tf.reshape( tf.slice( per_image_additional_fields[key], additional_field_dim * [0], tf.stack([per_image_num_valid_boxes] + (additional_field_dim - 1) * [-1])), [-1] + [dim.value for dim in additional_field_shape[1:]]) nmsed_boxlist = multiclass_non_max_suppression( per_image_boxes, per_image_scores, score_thresh, iou_thresh, max_size_per_class, max_total_size, clip_window=per_image_clip_window, change_coordinate_frame=change_coordinate_frame, masks=per_image_masks, additional_fields=per_image_additional_fields) padded_boxlist = box_list_ops.pad_or_clip_box_list( nmsed_boxlist, max_total_size) num_detections = nmsed_boxlist.num_boxes() nmsed_boxes = padded_boxlist.get() nmsed_scores = padded_boxlist.get_field( fields.BoxListFields.scores) nmsed_classes = padded_boxlist.get_field( fields.BoxListFields.classes) nmsed_masks = padded_boxlist.get_field(fields.BoxListFields.masks) nmsed_additional_fields = [ padded_boxlist.get_field(key) for key in per_image_additional_fields ] return ([nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks] + nmsed_additional_fields + [num_detections]) num_additional_fields = 0 if additional_fields is not None: num_additional_fields = len(additional_fields) num_nmsed_outputs = 4 + num_additional_fields batch_outputs = shape_utils.static_or_dynamic_map_fn( _single_image_nms_fn, elems=([boxes, scores, masks, clip_window] + list(additional_fields.values()) + [num_valid_boxes]), dtype=(num_nmsed_outputs * [tf.float32] + [tf.int32]), parallel_iterations=parallel_iterations) batch_nmsed_boxes = batch_outputs[0] batch_nmsed_scores = batch_outputs[1] batch_nmsed_classes = batch_outputs[2] batch_nmsed_masks = batch_outputs[3] batch_nmsed_additional_fields = { key: value for key, value in zip(additional_fields, batch_outputs[4:-1]) } batch_num_detections = batch_outputs[-1] if original_masks is None: batch_nmsed_masks = None if original_additional_fields is None: batch_nmsed_additional_fields = None return (batch_nmsed_boxes, batch_nmsed_scores, batch_nmsed_classes, batch_nmsed_masks, batch_nmsed_additional_fields, batch_num_detections)
with tf.variable_scope('test_model', reuse=False): if version == 1: model = resnet_v1(input_shape=input_shape, depth=depth) elif version == 2: model = resnet_v2(input_shape=input_shape, depth=depth) var_cls = model.trainable_weights saver_model = tf.train.Saver(var_cls, max_to_keep = None) #augmentation aug_1 = tf.image.pad_to_bounding_box(x_train_tf, 4, 4, height + 8, width + 8) aug_2 = tf.image.random_crop(aug_1, [batch_size, height, width, nch]) aug_3 = tf.image.random_flip_left_right(aug_2) x_train_tf_reshaped = tf.reshape(aug_3, [-1, height*width*nch]) repeated_x_train_tf = tf.tile(x_train_tf_reshaped, [1, k_macer]) repeated_x_train_tf = tf.reshape(repeated_x_train_tf, [-1, height*width*nch]) repeated_x_train_tf = tf.reshape(repeated_x_train_tf, [-1, height, width, nch]) noise = tf.random.normal(repeated_x_train_tf.shape) * sigma_macer noisy_inputs = repeated_x_train_tf + noise outputs = KerasModelWrapper(model).get_logits(noisy_inputs) outputs = tf.reshape(outputs, [-1, k_macer, nclass]) cls_test = KerasModelWrapper(model).get_logits(x_test_tf) # Classification loss on smoothed outputs_softmax = tf.reduce_mean(tf.nn.softmax(outputs, axis = 2), axis = 1) log_softmax = tf.math.log(outputs_softmax + 1E-10)
def __init__(self, is_training, config): self._batch_size = batch_size = config.batch_size self.num_skills = num_skills = config.num_skills self.num_steps = num_steps = config.num_steps label_size = (num_skills * 2) id_size = num_skills df_size = 11 cluster_size = (FLAGS.num_cluster + 1) reuse_flag = False output_size = (cluster_size) self.current_label = tf.placeholder(tf.int32, [batch_size, num_steps], name='current') self.next = tf.placeholder(tf.int32, [batch_size, num_steps], name='next') self.next_label = tf.placeholder(tf.int32, [batch_size, num_steps], name='next_label') self.ndf = tf.placeholder(tf.int32, [batch_size, num_steps], name='pd') self.cluster = tf.placeholder(tf.int32, [batch_size, num_steps], name='cluster') self._target_id = target_id = tf.placeholder(tf.int32, [None]) self._target_correctness = target_correctness = tf.placeholder( tf.float32, [None]) #final_hidden_size = size #one-hot encoding current_label = tf.reshape(self.current_label, [-1]) slice_cl_data = one_hot_output(current_label, label_size, batch_size, num_steps) next_label = tf.reshape(self.next_label, [-1]) slice_nl_data = one_hot_output(next_label, label_size, batch_size, num_steps) next = tf.reshape(self.next, [-1]) slice_x_data = one_hot_output(next, id_size, batch_size, num_steps) ndf = tf.reshape(self.ndf, [-1]) slice_ndf_data = one_hot_output(ndf, df_size, batch_size, num_steps) cluster = tf.reshape(self.cluster, [-1]) slice_cluster_data = one_hot_output(cluster, cluster_size, batch_size, num_steps) with tf.variable_scope('Memory'): init_memory_key = tf.get_variable( 'key', [FLAGS.memory_size, (id_size + cluster_size)], initializer=tf.truncated_normal_initializer(stddev=0.1)) init_memory_value = tf.get_variable( 'value', [FLAGS.memory_size, FLAGS.memory_value_state_dim], initializer=tf.truncated_normal_initializer(stddev=0.1)) init_memory_value = tf.tile(tf.expand_dims(init_memory_value, 0), tf.stack([batch_size, 1, 1])) memory = DSCMN(FLAGS.memory_size, id_size, FLAGS.memory_value_state_dim, init_memory_key=init_memory_key, init_memory_value=init_memory_value, name='DSCMN') input_l = [] for i in range(num_steps): if i != 0: reuse_flag = True current_label = tf.squeeze(slice_cl_data[i], 1) next_label = tf.squeeze(slice_nl_data[i], 1) next_id = tf.squeeze(slice_x_data[i], 1) df = tf.squeeze(slice_ndf_data[i], 1) cu = tf.squeeze(slice_cluster_data[i], 1) m = tf.concat([next_id, cu], 1) correlation_weight = memory.attention(m) read_content = memory.read(correlation_weight) m1 = tf.concat([current_label, read_content, df], 1) input_l.append(m1) update = tf.concat([next_label], 1) new_memory_value = memory.write(correlation_weight, update, reuse=reuse_flag) input_ = tf.stack(input_l) input_size = int(input_[0].get_shape()[1]) x_input = tf.reshape(input_, [-1, input_size]) x_input = tf.split(x_input, num_steps, 0) final_hidden_size = input_size hidden_layers = [] for i in range(FLAGS.hidden_layer_num): final_hidden_size = final_hidden_size hidden1 = tf.nn.rnn_cell.LSTMCell(final_hidden_size, state_is_tuple=True) if is_training and config.keep_prob < 1: hidden1 = tf.nn.rnn_cell.DropoutWrapper( hidden1, output_keep_prob=FLAGS.keep_prob) hidden_layers.append(hidden1) cell = tf.nn.rnn_cell.MultiRNNCell(hidden_layers, state_is_tuple=True) outputs, state = rnn.static_rnn(cell, x_input, dtype=tf.float32) output = tf.reshape(tf.concat(outputs, 1), [-1, int(final_hidden_size)]) sigmoid_w = tf.get_variable("sigmoid_w", [final_hidden_size, output_size]) sigmoid_b = tf.get_variable("sigmoid_b", [output_size]) logits = tf.matmul(output, sigmoid_w) + sigmoid_b logits = tf.reshape(logits, [-1]) selected_logits = tf.gather(logits, self.target_id) self._all_logits = logits #make prediction self._pred = tf.sigmoid(selected_logits) # loss function loss = tf.reduce_sum( tf.nn.sigmoid_cross_entropy_with_logits(logits=selected_logits, labels=target_correctness)) self._cost = cost = loss
def sample(self, n, max_length=None, z=None, temperature=None, start_inputs=None, beam_width=None, end_token=None): """Overrides BaseLstmDecoder `sample` method to add optional beam search. Args: n: Scalar number of samples to return. max_length: (Optional) Scalar maximum sample length to return. Required if data representation does not include end tokens. z: (Optional) Latent vectors to sample from. Required if model is conditional. Sized `[n, z_size]`. temperature: (Optional) The softmax temperature to use when not doing beam search. Defaults to 1.0. Ignored when `beam_width` is provided. start_inputs: (Optional) Initial inputs to use for batch. Sized `[n, output_depth]`. beam_width: (Optional) Width of beam to use for beam search. Beam search is disabled if not provided. end_token: (Optional) Scalar token signaling the end of the sequence to use for early stopping. Returns: samples: Sampled sequences. Sized `[n, max_length, output_depth]`. Raises: ValueError: If `z` is provided and its first dimension does not equal `n`. """ if beam_width is None: end_fn = (None if end_token is None else lambda x: tf.equal(tf.argmax(x, axis=-1), end_token)) return super(CategoricalLstmDecoder, self).sample( n, max_length, z, temperature, start_inputs, end_fn) # If `end_token` is not given, use an impossible value. end_token = self._output_depth if end_token is None else end_token if z is not None and z.shape[0].value != n: raise ValueError( '`z` must have a first dimension that equals `n` when given. ' 'Got: %d vs %d' % (z.shape[0].value, n)) if temperature is not None: tf.logging.warning('`temperature` is ignored when using beam search.') # Use a dummy Z in unconditional case. z = tf.zeros((n, 0), tf.float32) if z is None else z # If not given, start with dummy `-1` token and replace with zero vectors in # `embedding_fn`. start_tokens = ( tf.argmax(start_inputs, axis=-1, output_type=tf.int32) if start_inputs is not None else -1 * tf.ones([n], dtype=tf.int32)) initial_state = initial_cell_state_from_embedding( self._dec_cell, z, n, name='decoder/z_to_initial_state') beam_initial_state = tf.contrib.seq2seq.tile_batch( initial_state, multiplier=beam_width) # Tile `z` across beams. beam_z = tf.tile(tf.expand_dims(z, 1), [1, beam_width, 1]) def embedding_fn(tokens): # If tokens are the start_tokens (negative), replace with zero vectors. next_inputs = tf.cond( tf.less(tokens[0, 0], 0), lambda: tf.zeros([n, beam_width, self._output_depth]), lambda: tf.one_hot(tokens, self._output_depth)) # Concatenate `z` to next inputs. next_inputs = tf.concat([next_inputs, beam_z], axis=-1) return next_inputs decoder = tf.contrib.seq2seq.BeamSearchDecoder( self._dec_cell, embedding_fn, start_tokens, end_token, beam_initial_state, beam_width, output_layer=self._output_layer, length_penalty_weight=0.0) final_output, _, _ = tf.contrib.seq2seq.dynamic_decode( decoder, maximum_iterations=max_length, swap_memory=True, scope='decoder') return tf.one_hot( final_output.predicted_ids[:, :, 0], self._output_depth)
def _repeat(x, n_repeats): with tf.variable_scope('_repeat'): rep = tf.tile(tf.expand_dims(x, 1), [1, n_repeats]) return tf.reshape(rep, [-1])
# tf.random.categorical( # tf.ones((1, 36), dtype=tf.float32), # 32)[0])) # # h_xz = tf.reduce_sum(tf.square(z)) # ts_xz = tf.reduce_sum(log_det) # loss_xz = tf.reduce_sum(tf.square(z)) - tf.reduce_sum(log_det) # z = tf.random.normal((256, 6, 3), stddev=1) x_, log_det = graph_flow.f_zx( z, atoms, adjacency_map, tf.tile(walk, [256, 1])) bond_energy, angle_energy, one_four_energy, nonbonded_energy = gin.deterministic.mm.alkane_energy.alkane_energy( atoms, adjacency_map, x_) h_zx = tf.reduce_sum(bond_energy/(kB * T)) + tf.reduce_sum(angle_energy/(kB * T)) # + tf.reduce_sum(torsion_energy)# + tf.reduce_sum(one_four_energy) ts_zx = tf.reduce_sum(log_det) h_.append(h_zx.numpy()) ts_.append(ts_zx.numpy()) # # loss_zx = tf.reduce_sum(h_zx) - tf.reduce_sum(ts_zx) # # bond_energy, angle_energy, one_four_energy, nonbonded_energy = gin.deterministic.mm.alkane_energy.alkane_energy( # mol[0], mol[1], x) #
def pointnet_sa_module_msg(xyz, points, npoint, radius_list, nsample_list, mlp_list, is_training, bn_decay, scope, bn=True, use_xyz=True, use_nchw=False): ''' PointNet Set Abstraction (SA) module with Multi-Scale Grouping (MSG) Input: xyz: (batch_size, ndataset, 3) TF tensor points: (batch_size, ndataset, channel) TF tensor npoint: int32 -- #points sampled in farthest point sampling radius: list of float32 -- search radius in local region nsample: list of int32 -- how many points in each local region mlp: list of list of int32 -- output size for MLP on each point use_xyz: bool, if True concat XYZ with local point features, otherwise just use point features use_nchw: bool, if True, use NCHW data format for conv2d, which is usually faster than NHWC format Return: new_xyz: (batch_size, npoint, 3) TF tensor new_points: (batch_size, npoint, \sum_k{mlp[k][-1]}) TF tensor ''' data_format = 'NCHW' if use_nchw else 'NHWC' with tf.variable_scope(scope) as sc: new_xyz = gather_point(xyz, farthest_point_sample(npoint, xyz)) new_points_list = [] for i in range(len(radius_list)): radius = radius_list[i] nsample = nsample_list[i] idx, pts_cnt = query_ball_point(radius, nsample, xyz, new_xyz) grouped_xyz = group_point(xyz, idx) grouped_xyz -= tf.tile(tf.expand_dims(new_xyz, 2), [1, 1, nsample, 1]) if points is not None: grouped_points = group_point(points, idx) if use_xyz: grouped_points = tf.concat([grouped_points, grouped_xyz], axis=-1) else: grouped_points = grouped_xyz if use_nchw: grouped_points = tf.transpose(grouped_points, [0, 3, 1, 2]) for j, num_out_channel in enumerate(mlp_list[i]): grouped_points = tf_util.conv2d(grouped_points, num_out_channel, [1, 1], padding='VALID', stride=[1, 1], bn=bn, is_training=is_training, scope='conv%d_%d' % (i, j), bn_decay=bn_decay) if use_nchw: grouped_points = tf.transpose(grouped_points, [0, 2, 3, 1]) new_points = tf.reduce_max(grouped_points, axis=[2]) new_points_list.append(new_points) new_points_concat = tf.concat(new_points_list, axis=-1) return new_xyz, new_points_concat
def _selective_crop_and_resize(features, boxes, box_levels, boundaries, output_size=7, sample_offset=0.5, use_einsum_gather=False): """Crop and resize boxes on a set of feature maps. Given multiple features maps indexed by different levels, and a set of boxes where each box is mapped to a certain level, it selectively crops and resizes boxes from the corresponding feature maps to generate the box features. We follow the ROIAlign technique (see https://arxiv.org/pdf/1703.06870.pdf, figure 3 for reference). Specifically, for each feature map, we select an (output_size, output_size) set of pixels corresponding to the box location, and then use bilinear interpolation to select the feature value for each pixel. For performance, we perform the gather and interpolation on all layers as a single operation. In this op the multi-level features are first stacked and gathered into [2*output_size, 2*output_size] feature points. Then bilinear interpolation is performed on the gathered feature points to generate [output_size, output_size] RoIAlign feature map. Here is the step-by-step algorithm: 1. The multi-level features are gathered into a [batch_size, num_boxes, output_size*2, output_size*2, num_filters] Tensor. The Tensor contains four neighboring feature points for each vertex in the output grid. 2. Compute the interpolation kernel of shape [batch_size, num_boxes, output_size*2, output_size*2]. The last 2 axis can be seen as stacking 2x2 interpolation kernels for all vertices in the output grid. 3. Element-wise multiply the gathered features and interpolation kernel. Then apply 2x2 average pooling to reduce spatial dimension to output_size. Args: features: a 5-D tensor of shape [batch_size, num_levels, max_height, max_width, num_filters] where cropping and resizing are based. boxes: a 3-D tensor of shape [batch_size, num_boxes, 4] encoding the information of each box w.r.t. the corresponding feature map. boxes[:, :, 0:2] are the grid position in (y, x) (float) of the top-left corner of each box. boxes[:, :, 2:4] are the box sizes in (h, w) (float) in terms of the number of pixels of the corresponding feature map size. box_levels: a 3-D tensor of shape [batch_size, num_boxes, 1] representing the 0-based corresponding feature level index of each box. boundaries: a 3-D tensor of shape [batch_size, num_boxes, 2] representing the boundary (in (y, x)) of the corresponding feature map for each box. Any resampled grid points that go beyond the bounary will be clipped. output_size: a scalar indicating the output crop size. sample_offset: a float number in [0, 1] indicates the subpixel sample offset from grid point. use_einsum_gather: use einsum to replace gather or not. Replacing einsum with gather can improve performance when feature size is not large, einsum is friendly with model partition as well. Gather's performance is better when feature size is very large and there are multiple box levels. Returns: features_per_box: a 5-D tensor of shape [batch_size, num_boxes, output_size, output_size, num_filters] representing the cropped features. """ (batch_size, num_levels, max_feature_height, max_feature_width, num_filters) = features.get_shape().as_list() if batch_size is None: batch_size = tf.shape(features)[0] _, num_boxes, _ = boxes.get_shape().as_list() kernel_y, kernel_x, box_gridy0y1, box_gridx0x1 = _compute_grid_positions( boxes, boundaries, output_size, sample_offset) x_indices = tf.cast(tf.reshape(box_gridx0x1, [batch_size, num_boxes, output_size * 2]), dtype=tf.int32) y_indices = tf.cast(tf.reshape(box_gridy0y1, [batch_size, num_boxes, output_size * 2]), dtype=tf.int32) if use_einsum_gather: # Blinear interpolation is done during the last two gathers: # f(y, x) = [hy, ly] * [[f00, f01], * [hx, lx]^T # [f10, f11]] # [[f00, f01], # [f10, f11]] = tf.einsum(tf.einsum(features, y_one_hot), x_one_hot) # where [hy, ly] and [hx, lx] are the bilinear interpolation kernel. y_indices = tf.cast(tf.reshape( box_gridy0y1, [batch_size, num_boxes, output_size, 2]), dtype=tf.int32) x_indices = tf.cast(tf.reshape( box_gridx0x1, [batch_size, num_boxes, output_size, 2]), dtype=tf.int32) # shape is [batch_size, num_boxes, output_size, 2, height] grid_y_one_hot = tf.one_hot(tf.cast(y_indices, tf.int32), max_feature_height, dtype=kernel_y.dtype) # shape is [batch_size, num_boxes, output_size, 2, width] grid_x_one_hot = tf.one_hot(tf.cast(x_indices, tf.int32), max_feature_width, dtype=kernel_x.dtype) # shape is [batch_size, num_boxes, output_size, height] grid_y_weight = tf.reduce_sum(tf.multiply(grid_y_one_hot, kernel_y), axis=-2) # shape is [batch_size, num_boxes, output_size, width] grid_x_weight = tf.reduce_sum(tf.multiply(grid_x_one_hot, kernel_x), axis=-2) # Gather for y_axis. # shape is [batch_size, num_boxes, output_size, width, features] features_per_box = tf.einsum('bmhwf,bmoh->bmowf', features, tf.cast(grid_y_weight, features.dtype)) # Gather for x_axis. # shape is [batch_size, num_boxes, output_size, output_size, features] features_per_box = tf.einsum('bmhwf,bmow->bmhof', features_per_box, tf.cast(grid_x_weight, features.dtype)) else: height_dim_offset = max_feature_width level_dim_offset = max_feature_height * height_dim_offset batch_dim_offset = num_levels * level_dim_offset batch_size_offset = tf.tile( tf.reshape( tf.range(batch_size) * batch_dim_offset, [batch_size, 1, 1, 1]), [1, num_boxes, output_size * 2, output_size * 2]) box_levels_offset = tf.tile( tf.reshape(box_levels * level_dim_offset, [batch_size, num_boxes, 1, 1]), [1, 1, output_size * 2, output_size * 2]) y_indices_offset = tf.tile( tf.reshape(y_indices * height_dim_offset, [batch_size, num_boxes, output_size * 2, 1]), [1, 1, 1, output_size * 2]) x_indices_offset = tf.tile( tf.reshape(x_indices, [batch_size, num_boxes, 1, output_size * 2]), [1, 1, output_size * 2, 1]) indices = tf.reshape( batch_size_offset + box_levels_offset + y_indices_offset + x_indices_offset, [-1]) features = tf.reshape(features, [-1, num_filters]) # TODO(wangtao): replace tf.gather with tf.gather_nd and try to get similar # performance. features_per_box = tf.reshape(tf.gather(features, indices), [ batch_size, num_boxes, output_size * 2, output_size * 2, num_filters ]) features_per_box = _feature_bilinear_interpolation( features_per_box, kernel_y, kernel_x) return features_per_box
def _build_graph(self): dim_u = self.config['ds'].dim_u dim_x = self.config['dim_x'] dim_y = self.config['ds'].dim_y ind_pnt_num = self.config['ind_pnt_num'] samples = self.config['samples'] loss_factors = self.config['loss_factors'] with self.graph.as_default(): # Variables self.zeta_pos = tf.Variable( np.random.uniform(low=-self.config['zeta_pos'], high=self.config['zeta_pos'], size=(ind_pnt_num, dim_x + dim_u))) self.zeta_mean = tf.Variable(self.config['zeta_mean'] * np.random.rand(ind_pnt_num, dim_x)) zeta_var_unc = tf.Variable( backward(self.config['zeta_var'] * np.ones( (ind_pnt_num, dim_x)))) self.zeta_var = forward(zeta_var_unc) var_x_unc = tf.Variable(backward(self.config['var_x'])) self.var_x = forward(var_x_unc) var_y_unc = tf.Variable(backward(self.config['var_y'])) self.var_y = forward(var_y_unc) self.kern = RBF(self.config['gp_var'], self.config['gp_len']) self.var_dict = { 'process noise': self.var_x, 'observation noise': self.var_y, 'kernel lengthscales': self.kern.lengthscales, 'kernel variance': self.kern.variance, 'IP pos': self.zeta_pos, 'IP mean': self.zeta_mean, 'IP var': self.zeta_var } # Loop init x_array = tf.TensorArray(dtype=tf.float64, size=self.seq_len_tf, clear_after_read=False) x_0 = self._recog_model(self.sample_in, self.sample_out) x_array = x_array.write(0, x_0) u_array = tf.TensorArray(dtype=tf.float64, size=self.seq_len_tf, clear_after_read=False) u_dub = tf.transpose(self.sample_in, perm=[1, 0, 2]) u_dub = tf.tile(tf.expand_dims(u_dub, axis=2), [1, 1, samples, 1]) u_array = u_array.unstack(u_dub) # Loop u_final, x_final, t_final = tf.while_loop( lambda u, x, t: t < self.seq_len_tf - 1, self._loop_body, [u_array, x_array, 0], parallel_iterations=1) x_final = tf.transpose(x_final.stack(), perm=[1, 0, 2, 3]) self.y_final = x_final[:, :, :, :dim_y] # Likelihood var_y_exp = tf.expand_dims( tf.expand_dims(tf.expand_dims(self.var_y, 0), 0), 0) var_full = tf.tile(var_y_exp, [self.batch_tf, self.seq_len_tf, samples, 1]) y_dist = tf.contrib.distributions.MultivariateNormalDiag( loc=self.y_final, scale_diag=tf.sqrt(var_full)) obs = tf.tile(tf.expand_dims(self.sample_out, 2), [1, 1, samples, 1]) log_probs = y_dist.log_prob(obs) loglik = tf.reduce_sum(log_probs) # KL-Regularizer k_prior = self.kern.K(self.zeta_pos, self.zeta_pos) scale_prior = tf.tile(tf.expand_dims(tf.cholesky(k_prior), 0), [dim_x, 1, 1]) zeta_prior = tf.contrib.distributions.MultivariateNormalTriL( loc=tf.zeros((dim_x, ind_pnt_num), dtype=tf.float64), scale_tril=scale_prior) zeta_dist = tf.contrib.distributions.MultivariateNormalDiag( loc=tf.transpose(self.zeta_mean), scale_diag=tf.sqrt(tf.transpose(self.zeta_var))) kl_reg = tf.reduce_sum( tf.contrib.distributions.kl_divergence(zeta_dist, zeta_prior)) # Statistics self.pred_mean, self.pred_var = tf.nn.moments(self.y_final, axes=[2]) self.pred_var = tf.add(self.pred_var, self.var_y) self.internal_mean, self.internal_var = tf.nn.moments(x_final, axes=[2]) self.mse = tf.losses.mean_squared_error(labels=self.sample_out, predictions=self.pred_mean) self.sde = tf.abs(self.pred_mean - self.sample_out) / tf.sqrt( self.pred_var) # Training elbo = loglik * loss_factors[0] - kl_reg self.loss = tf.negative(elbo) optimizer = tf.train.AdamOptimizer( learning_rate=self.config['learning_rate']) self.train = optimizer.minimize(self.loss) self.saver = tf.train.Saver() self.init = tf.global_variables_initializer()
def compute_eval_loss_and_metrics(logits, # type: tf.Tensor softmax_logits, # type: tf.Tensor duplicate_mask, # type: tf.Tensor num_training_neg, # type: int match_mlperf=False, # type: bool use_tpu_spec=False # type: bool ): # type: (...) -> tf.estimator.EstimatorSpec """Model evaluation with HR and NDCG metrics. The evaluation protocol is to rank the test interacted item (truth items) among the randomly chosen 999 items that are not interacted by the user. The performance of the ranked list is judged by Hit Ratio (HR) and Normalized Discounted Cumulative Gain (NDCG). For evaluation, the ranked list is truncated at 10 for both metrics. As such, the HR intuitively measures whether the test item is present on the top-10 list, and the NDCG accounts for the position of the hit by assigning higher scores to hits at top ranks. Both metrics are calculated for each test user, and the average scores are reported. If `match_mlperf` is True, then the HR and NDCG computations are done in a slightly unusual way to match the MLPerf reference implementation. Specifically, if the evaluation negatives contain duplicate items, it will be treated as if the item only appeared once. Effectively, for duplicate items in a row, the predicted score for all but one of the items will be set to -infinity For example, suppose we have that following inputs: logits_by_user: [[ 2, 3, 3], [ 5, 4, 4]] items_by_user: [[10, 20, 20], [30, 40, 40]] # Note: items_by_user is not explicitly present. Instead the relevant \ information is contained within `duplicate_mask` top_k: 2 Then with match_mlperf=True, the HR would be 2/2 = 1.0. With match_mlperf=False, the HR would be 1/2 = 0.5. This is because each user has predicted scores for only 2 unique items: 10 and 20 for the first user, and 30 and 40 for the second. Therefore, with match_mlperf=True, it's guaranteed the first item's score is in the top 2. With match_mlperf=False, this function would compute the first user's first item is not in the top 2, because item 20 has a higher score, and item 20 occurs twice. Args: logits: A tensor containing the predicted logits for each user. The shape of logits is (num_users_per_batch * (1 + NUM_EVAL_NEGATIVES),) Logits for a user are grouped, and the first element of the group is the true element. softmax_logits: The same tensor, but with zeros left-appended. duplicate_mask: A vector with the same shape as logits, with a value of 1 if the item corresponding to the logit at that position has already appeared for that user. num_training_neg: The number of negatives per positive during training. match_mlperf: Use the MLPerf reference convention for computing rank. use_tpu_spec: Should a TPUEstimatorSpec be returned instead of an EstimatorSpec. Required for TPUs and if XLA is done on a GPU. Despite its name, TPUEstimatorSpecs work with GPUs Returns: An EstimatorSpec for evaluation. """ in_top_k, ndcg, metric_weights, logits_by_user = compute_top_k_and_ndcg( logits, duplicate_mask, match_mlperf) # Examples are provided by the eval Dataset in a structured format, so eval # labels can be reconstructed on the fly. eval_labels = tf.reshape(tf.one_hot( tf.zeros(shape=(logits_by_user.shape[0],), dtype=tf.int32), logits_by_user.shape[1], dtype=tf.int32), (-1,)) eval_labels_float = tf.cast(eval_labels, tf.float32) # During evaluation, the ratio of negatives to positives is much higher # than during training. (Typically 999 to 1 vs. 4 to 1) By adjusting the # weights for the negative examples we compute a loss which is consistent with # the training data. (And provides apples-to-apples comparison) negative_scale_factor = num_training_neg / rconst.NUM_EVAL_NEGATIVES example_weights = ( (eval_labels_float + (1 - eval_labels_float) * negative_scale_factor) * (1 + rconst.NUM_EVAL_NEGATIVES) / (1 + num_training_neg)) # Tile metric weights back to logit dimensions expanded_metric_weights = tf.reshape(tf.tile( metric_weights[:, tf.newaxis], (1, rconst.NUM_EVAL_NEGATIVES + 1)), (-1,)) # ignore padded examples example_weights *= tf.cast(expanded_metric_weights, tf.float32) cross_entropy = tf.losses.sparse_softmax_cross_entropy( logits=softmax_logits, labels=eval_labels, weights=example_weights) def metric_fn(top_k_tensor, ndcg_tensor, weight_tensor): return { rconst.HR_KEY: tf.metrics.mean(top_k_tensor, weights=weight_tensor), rconst.NDCG_KEY: tf.metrics.mean(ndcg_tensor, weights=weight_tensor), } if use_tpu_spec: return tf.contrib.tpu.TPUEstimatorSpec( mode=tf.estimator.ModeKeys.EVAL, loss=cross_entropy, eval_metrics=(metric_fn, [in_top_k, ndcg, metric_weights])) return tf.estimator.EstimatorSpec( mode=tf.estimator.ModeKeys.EVAL, loss=cross_entropy, eval_metric_ops=metric_fn(in_top_k, ndcg, metric_weights) )
def crop_mask_in_target_box(masks, boxes, target_boxes, output_size, sample_offset=0, use_einsum=True): """Crop masks in target boxes. Args: masks: A tensor with a shape of [batch_size, num_masks, height, width]. boxes: a float tensor representing box cooridnates that tightly enclose masks with a shape of [batch_size, num_masks, 4] in un-normalized coordinates. A box is represented by [ymin, xmin, ymax, xmax]. target_boxes: a float tensor representing target box cooridnates for masks with a shape of [batch_size, num_masks, 4] in un-normalized coordinates. A box is represented by [ymin, xmin, ymax, xmax]. output_size: A scalar to indicate the output crop size. It currently only supports to output a square shape outputs. sample_offset: a float number in [0, 1] indicates the subpixel sample offset from grid point. use_einsum: Use einsum to replace gather in selective_crop_and_resize. Returns: A 4-D tensor representing feature crop of shape [batch_size, num_boxes, output_size, output_size]. """ with tf.name_scope('crop_mask_in_target_box'): # Cast to float32, as the y_transform and other transform variables may # overflow in float16 masks = tf.cast(masks, tf.float32) boxes = tf.cast(boxes, tf.float32) target_boxes = tf.cast(target_boxes, tf.float32) batch_size, num_masks, height, width = masks.get_shape().as_list() if batch_size is None: batch_size = tf.shape(masks)[0] masks = tf.reshape(masks, [batch_size * num_masks, height, width, 1]) # Pad zeros on the boundary of masks. masks = tf.image.pad_to_bounding_box(masks, 2, 2, height + 4, width + 4) masks = tf.reshape(masks, [batch_size, num_masks, height + 4, width + 4, 1]) # Projects target box locations and sizes to corresponding cropped # mask coordinates. gt_y_min, gt_x_min, gt_y_max, gt_x_max = tf.split(value=boxes, num_or_size_splits=4, axis=2) bb_y_min, bb_x_min, bb_y_max, bb_x_max = tf.split(value=target_boxes, num_or_size_splits=4, axis=2) y_transform = (bb_y_min - gt_y_min) * height / (gt_y_max - gt_y_min + _EPSILON) + 2 x_transform = (bb_x_min - gt_x_min) * height / (gt_x_max - gt_x_min + _EPSILON) + 2 h_transform = (bb_y_max - bb_y_min) * width / (gt_y_max - gt_y_min + _EPSILON) w_transform = (bb_x_max - bb_x_min) * width / (gt_x_max - gt_x_min + _EPSILON) boundaries = tf.concat([ tf.ones_like(y_transform) * ((height + 4) - 1), tf.ones_like(x_transform) * ((width + 4) - 1) ], axis=-1) boundaries = tf.cast(boundaries, dtype=y_transform.dtype) # Reshape tensors to have the right shape for selective_crop_and_resize. trasnformed_boxes = tf.concat( [y_transform, x_transform, h_transform, w_transform], -1) levels = tf.tile(tf.reshape(tf.range(num_masks), [1, num_masks]), [batch_size, 1]) cropped_masks = _selective_crop_and_resize( masks, trasnformed_boxes, levels, boundaries, output_size, sample_offset=sample_offset, use_einsum_gather=use_einsum) cropped_masks = tf.squeeze(cropped_masks, axis=-1) return cropped_masks
def discriminative_loss_single(prediction, correct_label, feature_dim, label_shape, delta_v, delta_d, param_var, param_dist, param_reg): ''' Discriminative loss for a single prediction/label pair. :param prediction: inference of network :param correct_label: instance label :feature_dim: feature dimension of prediction :param label_shape: shape of label :param delta_v: cutoff variance distance :param delta_d: curoff cluster distance :param param_var: weight for intra cluster variance :param param_dist: weight for inter cluster distances :param param_reg: weight regularization ''' ### Reshape so pixels are aligned along a vector correct_label = tf.reshape(correct_label, [label_shape[1] * label_shape[0]]) reshaped_pred = tf.reshape(prediction, [label_shape[1] * label_shape[0], feature_dim]) ### Count instances unique_labels, unique_id, counts = tf.unique_with_counts(correct_label) counts = tf.cast(counts, tf.float32) num_instances = tf.size(unique_labels) segmented_sum = tf.math.unsorted_segment_sum(reshaped_pred, unique_id, num_instances) mu = tf.divide(segmented_sum, tf.reshape(counts, (-1, 1))) mu_expand = tf.gather(mu, unique_id) ### Calculate l_var distance = tf.norm(tf.subtract(mu_expand, reshaped_pred), axis=1) distance = tf.subtract(distance, delta_v) distance = tf.clip_by_value(distance, 0., distance) distance = tf.square(distance) l_var = tf.math.unsorted_segment_sum(distance, unique_id, num_instances) l_var = tf.divide(l_var, counts) l_var = tf.reduce_sum(l_var) l_var = tf.divide(l_var, tf.cast(num_instances, tf.float32)) ### Calculate l_dist # Get distance for each pair of clusters like this: # mu_1 - mu_1 # mu_2 - mu_1 # mu_3 - mu_1 # mu_1 - mu_2 # mu_2 - mu_2 # mu_3 - mu_2 # mu_1 - mu_3 # mu_2 - mu_3 # mu_3 - mu_3 mu_interleaved_rep = tf.tile(mu, [num_instances, 1]) mu_band_rep = tf.tile(mu, [1, num_instances]) mu_band_rep = tf.reshape(mu_band_rep, (num_instances * num_instances, feature_dim)) mu_diff = tf.subtract(mu_band_rep, mu_interleaved_rep) # Filter out zeros from same cluster subtraction intermediate_tensor = tf.reduce_sum(tf.abs(mu_diff), axis=1) zero_vector = tf.zeros(1, dtype=tf.float32) bool_mask = tf.not_equal(intermediate_tensor, zero_vector) mu_diff_bool = tf.boolean_mask(mu_diff, bool_mask) mu_norm = tf.norm(mu_diff_bool, axis=1) mu_norm = tf.subtract(2. * delta_d, mu_norm) mu_norm = tf.clip_by_value(mu_norm, 0., mu_norm) mu_norm = tf.square(mu_norm) l_dist = tf.reduce_mean(mu_norm) ### Calculate l_reg l_reg = tf.reduce_mean(tf.norm(mu, axis=1)) param_scale = 1. l_var = param_var * l_var l_dist = param_dist * l_dist l_reg = param_reg * l_reg loss = param_scale * (l_var + l_dist + l_reg) return loss, l_var, l_dist, l_reg
def multilevel_crop_and_resize(features, boxes, output_size=7, sample_offset=0.5): """Crop and resize on multilevel feature pyramid. Generate the (output_size, output_size) set of pixels for each input box by first locating the box into the correct feature level, and then cropping and resizing it using the correspoding feature map of that level. Args: features: A dictionary with key as pyramid level and value as features. The features are in shape of [batch_size, height_l, width_l, num_filters]. boxes: A 3-D Tensor of shape [batch_size, num_boxes, 4]. Each row represents a box with [y1, x1, y2, x2] in un-normalized coordinates. output_size: A scalar to indicate the output crop size. sample_offset: a float number in [0, 1] indicates the subpixel sample offset from grid point. Returns: A 5-D tensor representing feature crop of shape [batch_size, num_boxes, output_size, output_size, num_filters]. """ with tf.name_scope('multilevel_crop_and_resize'): levels = list(features.keys()) min_level = int(min(levels)) max_level = int(max(levels)) features_shape = tf.shape(features[str(min_level)]) batch_size, max_feature_height, max_feature_width, num_filters = ( features_shape[0], features_shape[1], features_shape[2], features_shape[3]) num_boxes = tf.shape(boxes)[1] # Stack feature pyramid into a features_all of shape # [batch_size, levels, height, width, num_filters]. features_all = [] feature_heights = [] feature_widths = [] for level in range(min_level, max_level + 1): shape = features[str(level)].get_shape().as_list() feature_heights.append(shape[1]) feature_widths.append(shape[2]) # Concat tensor of [batch_size, height_l * width_l, num_filters] for each # levels. features_all.append( tf.reshape(features[str(level)], [batch_size, -1, num_filters])) features_r2 = tf.reshape(tf.concat(features_all, 1), [-1, num_filters]) # Calculate height_l * width_l for each level. level_dim_sizes = [ feature_widths[i] * feature_heights[i] for i in range(len(feature_widths)) ] # level_dim_offsets is accumulated sum of level_dim_size. level_dim_offsets = [0] for i in range(len(feature_widths) - 1): level_dim_offsets.append(level_dim_offsets[i] + level_dim_sizes[i]) batch_dim_size = level_dim_offsets[-1] + level_dim_sizes[-1] level_dim_offsets = tf.constant(level_dim_offsets, tf.int32) height_dim_sizes = tf.constant(feature_widths, tf.int32) # Assigns boxes to the right level. box_width = boxes[:, :, 3] - boxes[:, :, 1] box_height = boxes[:, :, 2] - boxes[:, :, 0] areas_sqrt = tf.sqrt( tf.cast(box_height, tf.float32) * tf.cast(box_width, tf.float32)) levels = tf.cast(tf.math.floordiv( tf.math.log(tf.math.divide_no_nan(areas_sqrt, 224.0)), tf.math.log(2.0)) + 4.0, dtype=tf.int32) # Maps levels between [min_level, max_level]. levels = tf.minimum(max_level, tf.maximum(levels, min_level)) # Projects box location and sizes to corresponding feature levels. scale_to_level = tf.cast(tf.pow(tf.constant(2.0), tf.cast(levels, tf.float32)), dtype=boxes.dtype) boxes /= tf.expand_dims(scale_to_level, axis=2) box_width /= scale_to_level box_height /= scale_to_level boxes = tf.concat([ boxes[:, :, 0:2], tf.expand_dims(box_height, -1), tf.expand_dims(box_width, -1) ], axis=-1) # Maps levels to [0, max_level-min_level]. levels -= min_level level_strides = tf.pow([[2.0]], tf.cast(levels, tf.float32)) boundary = tf.cast( tf.concat([ tf.expand_dims([[tf.cast(max_feature_height, tf.float32)]] / level_strides - 1, axis=-1), tf.expand_dims([[tf.cast(max_feature_width, tf.float32)]] / level_strides - 1, axis=-1), ], axis=-1), boxes.dtype) # Compute grid positions. kernel_y, kernel_x, box_gridy0y1, box_gridx0x1 = _compute_grid_positions( boxes, boundary, output_size, sample_offset) x_indices = tf.cast(tf.reshape( box_gridx0x1, [batch_size, num_boxes, output_size * 2]), dtype=tf.int32) y_indices = tf.cast(tf.reshape( box_gridy0y1, [batch_size, num_boxes, output_size * 2]), dtype=tf.int32) batch_size_offset = tf.tile( tf.reshape( tf.range(batch_size) * batch_dim_size, [batch_size, 1, 1, 1]), [1, num_boxes, output_size * 2, output_size * 2]) # Get level offset for each box. Each box belongs to one level. levels_offset = tf.tile( tf.reshape(tf.gather(level_dim_offsets, levels), [batch_size, num_boxes, 1, 1]), [1, 1, output_size * 2, output_size * 2]) y_indices_offset = tf.tile( tf.reshape( y_indices * tf.expand_dims(tf.gather(height_dim_sizes, levels), -1), [batch_size, num_boxes, output_size * 2, 1]), [1, 1, 1, output_size * 2]) x_indices_offset = tf.tile( tf.reshape(x_indices, [batch_size, num_boxes, 1, output_size * 2]), [1, 1, output_size * 2, 1]) indices = tf.reshape( batch_size_offset + levels_offset + y_indices_offset + x_indices_offset, [-1]) # TODO(wangtao): replace tf.gather with tf.gather_nd and try to get similar # performance. features_per_box = tf.reshape(tf.gather(features_r2, indices), [ batch_size, num_boxes, output_size * 2, output_size * 2, num_filters ]) # Bilinear interpolation. features_per_box = _feature_bilinear_interpolation( features_per_box, kernel_y, kernel_x) return features_per_box
def decode(self, x, conv_inputs1, features): batch_size = tf.shape(x)[0] # initialize state tensor arrays state_queues = [] for i, (conv_input, dilation) in enumerate(zip(conv_inputs1, self.dilations)): batch_idx = tf.range(batch_size) batch_idx = tf.tile(tf.expand_dims(batch_idx, 1), (1, dilation)) batch_idx = tf.reshape(batch_idx, [-1]) queue_begin_time = self.encode_len - dilation - 1 temporal_idx = tf.expand_dims(queue_begin_time, 1) + tf.expand_dims(tf.range(dilation), 0) temporal_idx = tf.reshape(temporal_idx, [-1]) idx = tf.stack([batch_idx, temporal_idx], axis=1) slices = tf.reshape(tf.gather_nd(conv_input, idx), (batch_size, dilation, shape(conv_input, 2))) layer_ta = tf.TensorArray(dtype=tf.float32, size=dilation + self.decode_series_len) layer_ta = layer_ta.unstack(tf.transpose(slices, (1, 0, 2))) state_queues.append(layer_ta) # initialize feature tensor array features_ta = tf.TensorArray(dtype=tf.float32, size=self.decode_series_len) features_ta = features_ta.unstack(tf.transpose(features, (1, 0, 2))) # initialize output tensor array emit_ta = tf.TensorArray(size=self.decode_series_len, dtype=tf.float32) # initialize other loop vars elements_finished = 0 >= self.decode_len time = tf.constant(0, dtype=tf.int32) # get initial x input current_idx = tf.stack([tf.range(tf.shape(self.encode_len)[0]), self.encode_len - 1], axis=1) initial_input = tf.gather_nd(x, current_idx) def loop_fn(time1, current_input, queues): current_features = features_ta.read(time1) current_input = tf.concat([current_input, current_features], axis=1) with tf.variable_scope('x-proj-decode', reuse=True): w_x_proj = tf.get_variable('weights') b_x_proj = tf.get_variable('biases') x_proj = tf.nn.tanh(tf.matmul(current_input, w_x_proj) + b_x_proj) skip_outputs, updated_queues = [], [] for i, (conv_input, queue, dilation) in enumerate(zip(conv_inputs1, queues, self.dilations)): state = queue.read(time1) with tf.variable_scope('dilated-conv-decode-{}'.format(i), reuse=True): w_conv = tf.get_variable('weights'.format(i)) b_conv = tf.get_variable('biases'.format(i)) dilated_conv = tf.matmul(state, w_conv[0, :, :]) + tf.matmul(x_proj, w_conv[1, :, :]) + b_conv conv_filter, conv_gate = tf.split(dilated_conv, 2, axis=1) dilated_conv = tf.nn.tanh(conv_filter) * tf.nn.sigmoid(conv_gate) with tf.variable_scope('dilated-conv-proj-decode-{}'.format(i), reuse=True): w_proj = tf.get_variable('weights'.format(i)) b_proj = tf.get_variable('biases'.format(i)) concat_outputs = tf.matmul(dilated_conv, w_proj) + b_proj skips, residuals = tf.split(concat_outputs, [self.skip_channels, self.residual_channels], axis=1) x_proj += residuals skip_outputs.append(skips) updated_queues.append(queue.write(time1 + dilation, x_proj)) skip_outputs = tf.nn.relu(tf.concat(skip_outputs, axis=1)) with tf.variable_scope('dense-decode-1', reuse=True): w_h = tf.get_variable('weights') b_h = tf.get_variable('biases') h = tf.nn.relu(tf.matmul(skip_outputs, w_h) + b_h) with tf.variable_scope('dense-decode-2', reuse=True): w_y = tf.get_variable('weights') b_y = tf.get_variable('biases') y_hat2 = tf.matmul(h, w_y) + b_y elements_finished2 = (time1 >= self.decode_len) finished = tf.reduce_all(elements_finished2) next_input = tf.cond( finished, lambda: tf.zeros([batch_size, 1], dtype=tf.float32), lambda: y_hat2 ) next_elements_finished = (time1 >= self.decode_len -1) return next_elements_finished, next_input, updated_queues def condition(unused_time, elements_finished1, *_): return tf.logical_not(tf.reduce_all(elements_finished1)) def body(time1, elements_finished1, emit_ta1, *state_queues1): (next_finished, emit_output, state_queues2) = loop_fn(time1, initial_input, state_queues1) emit = tf.where(elements_finished1, tf.zeros_like(emit_output), emit_output) emit_ta2 = emit_ta1.write(time1, emit) #elements_finished2 = tf.logical_or(elements_finished1, next_finished) return [time1 + 1, next_finished, emit_ta2] + list(state_queues2) returned = tf.while_loop( cond=condition, body=body, loop_vars=[time, elements_finished, emit_ta] + state_queues ) outputs_ta = returned[2] y_hat = tf.transpose(outputs_ta.stack(), (1, 0, 2)) return y_hat
def __init__(self): self.sess = tf.Session() self.state_size = env_set['state'] self.output_size = env_set['action'] self.worker_size = env_set['worker'] self.support_size = 8 self.target_update_tau = 0.995 self.gamma = 0.99 self.hidden = env_set['hidden'] self.batch_size = 64 self.pi_lr = 1e-4 self.q_lr = 1e-3 self.action_limit = 1.0 self.memory = replay_buffer(env_set['mem_size']) self.target_noise = 0.2 self.noise_clip = 0.1 self.alpha = 1e-5 self.x_ph, self.a_ph, self.tau_ph,self.x2_ph, self.r_ph, self.d_ph = \ cr.placeholders(self.state_size, self.output_size, self.support_size,self.state_size, None, None) with tf.variable_scope('main'): self.pi, self.logp_pi, self.q1, self.q2, self.q1_pi, self.q2_pi, self.v = cr.dipg_sac_mlp_actor_critic( x=self.x_ph, a=self.a_ph, tau= self.tau_ph, hidden=self.hidden, activation=tf.nn.relu, output_activation=tf.tanh, output_size=self.output_size ) with tf.variable_scope('target'): _, _, _, _, _, _, self.v_targ = cr.dipg_sac_mlp_actor_critic( x=self.x2_ph, a=self.a_ph, tau=self.tau_ph, hidden=self.hidden, activation=tf.nn.relu, output_activation=tf.tanh, output_size=self.output_size ) self.pi_params = cr.get_vars('main/pi') self.value_params = cr.get_vars('main/q') + cr.get_vars('main/v') self.min_q = tf.where(tf.less(tf.reduce_mean(self.q1_pi),tf.reduce_mean(self.q2_pi)),self.q1_pi,self.q2_pi) self.q_backup = tf.stop_gradient(tf.tile(tf.expand_dims(self.r_ph,axis=1),[1,self.support_size])\ + self.gamma*tf.tile(tf.expand_dims(1-self.d_ph,axis=1),[1,self.support_size])*self.v_targ) self.v_backup = tf.stop_gradient(self.min_q\ - self.alpha*tf.tile(tf.expand_dims(self.logp_pi,axis=1),[1,self.support_size])) self.pi_loss = tf.reduce_mean(self.alpha * self.logp_pi - tf.reduce_mean(self.q1_pi*tf.square(self.tau_ph))) tau = self.tau_ph inv_tau = 1 - tau tau = tf.tile(tf.expand_dims(tau, axis=1), [1, self.support_size, 1]) inv_tau = tf.tile(tf.expand_dims(inv_tau, axis=1), [1, self.support_size, 1]) logit_valid_tile = tf.tile(tf.expand_dims(self.q_backup, axis=1), [1, self.support_size, 1]) theta_loss_tile = tf.tile(tf.expand_dims(self.q1, axis=2), [1, 1, self.support_size]) Huber_loss = tf.losses.mean_squared_error(logit_valid_tile, theta_loss_tile, reduction=tf.losses.Reduction.NONE) error_loss = logit_valid_tile - theta_loss_tile Loss = tf.where(tf.less(error_loss, 0.0), Huber_loss, tau * Huber_loss) self.q1_loss = 0.5*tf.reduce_mean(tf.reduce_sum(tf.reduce_mean(Loss, axis=2), axis=1)) theta_loss_tile = tf.tile(tf.expand_dims(self.q2, axis=2), [1, 1, self.support_size]) Huber_loss = tf.losses.mean_squared_error(logit_valid_tile, theta_loss_tile, reduction=tf.losses.Reduction.NONE) error_loss = logit_valid_tile - theta_loss_tile Loss = tf.where(tf.less(error_loss, 0.0), Huber_loss, tau * Huber_loss) self.q2_loss = 0.5*tf.reduce_mean(tf.reduce_sum(tf.reduce_mean(Loss, axis=2), axis=1)) theta_loss_tile = tf.tile(tf.expand_dims(self.v, axis=2), [1, 1, self.support_size]) logit_valid_tile = tf.tile(tf.expand_dims(self.v_backup, axis=1), [1, self.support_size, 1]) Huber_loss = tf.losses.mean_squared_error(logit_valid_tile, theta_loss_tile, reduction=tf.losses.Reduction.NONE) error_loss = logit_valid_tile - theta_loss_tile Loss = tf.where(tf.less(error_loss, 0.0), Huber_loss, tau * Huber_loss) self.v_loss = 0.5*tf.reduce_mean(tf.reduce_sum(tf.reduce_mean(Loss, axis=2), axis=1)) self.value_loss = self.q1_loss + self.q2_loss + self.v_loss self.pi_optimizer = tf.train.AdamOptimizer(self.pi_lr) self.train_pi_op = self.pi_optimizer.minimize(self.pi_loss, var_list=self.pi_params) self.value_optimizer = tf.train.AdamOptimizer(self.q_lr) with tf.control_dependencies([self.train_pi_op]): self.train_value_op = self.value_optimizer.minimize(self.value_loss, var_list=self.value_params) with tf.control_dependencies([self.train_value_op]): self.target_update = tf.group([tf.assign(v_targ, self.target_update_tau * v_targ + (1 - self.target_update_tau) * v_main) for v_main, v_targ in zip(cr.get_vars('main'), cr.get_vars('target'))]) self.step_ops = [self.pi_loss, self.value_loss, self.train_pi_op, self.train_value_op, self.target_update] self.target_init = tf.group([tf.assign(v_targ, v_main) for v_main, v_targ in zip(cr.get_vars('main/v'), cr.get_vars('target/v'))]) self.sess.run(tf.global_variables_initializer()) self.sess.run(self.target_init)
def parse_obs(game_state): full_map = game_state['gameMap'] rows = game_state['gameDetails']['mapHeight'] columns = game_state['gameDetails']['mapWidth'] player_buildings = getPlayerBuildings(full_map, rows, columns) opponent_buildings = getOpponentBuildings(full_map, rows, columns) projectiles = getProjectiles(full_map, rows, columns) player_info = getPlayerInfo('A', game_state) opponent_info = getPlayerInfo('B', game_state) round_num = game_state['gameDetails']['round'] # works for jar v1.1.2 prices = { "ATTACK": game_state['gameDetails']['buildingsStats']['ATTACK']['price'], "DEFENSE": game_state['gameDetails']['buildingsStats']['DEFENSE']['price'], "ENERGY": game_state['gameDetails']['buildingsStats']['ENERGY']['price'], "TESLTA": game_state['gameDetails']['buildingsStats']['TESLA']['price'], } with tf.name_scope("shaping_inputs") as scope: if debug: print("Shaping inputs...") s = Stopwatch() pb = tf.one_hot(indices=player_buildings, depth=5, axis=-1, name="player_buildings") # 20x20x5 ob = tf.one_hot(indices=opponent_buildings, depth=5, axis=-1, name="opp_buildings") # 20x20x5 proj = tf.one_hot(indices=projectiles, depth=3, axis=-1, name='projectiles') # 20x40x3 k = proj.get_shape().as_list() proj = tf.reshape(proj, [int(k[0]), int(k[1] / 2), 6 ]) # 20x20x6. Only works for single misssiles non_spatial = list(player_info.values())[1:] + list( opponent_info.values())[1:] + list(prices.values()) # 12x1 non_spatial = tf.cast(non_spatial, dtype=tf.float32) # broadcasting the non-spatial features to the channel dimension broadcast_stats = tf.tile( tf.expand_dims(tf.expand_dims(non_spatial, axis=0), axis=0), [int(k[0]), int(k[1] / 2), 1]) # now 20x20x11 # adding all the inputs together via the channel dimension spatial = tf.concat([pb, ob, proj, broadcast_stats], axis=-1) # 20x20x(16 + 12) if debug: print("Finished shaping inputs. Took " + s.delta + "\nShape of inputs:" + str(spatial.shape)) return spatial, rows, columns
w_1 = tf.Variable(tf.truncated_normal([785, middle], stddev=0.1)) w_2 = tf.Variable(tf.truncated_normal([middle + 1, 10], stddev=0.1)) w_old_1 = tf.Variable(tf.zeros([785, middle])) w_old_2 = tf.Variable(tf.zeros([middle + 1, 10])) #Conceptors for used spaces A_0 = np.zeros([785, 785]) A_1 = np.zeros([middle + 1, middle + 1]) #Conceptors for free spaces F_0 = tf.Variable(tf.eye(785)) F_1 = tf.Variable(tf.eye(middle + 1)) #Forward Pass, ab_i is the state vector together with bias ab_0 = tf.concat([a_0, tf.tile(tf.ones([1, 1]), [tf.shape(a_0)[0], 1])], 1) z_1 = tf.matmul(ab_0, w_1) a_1 = sigma(z_1) ab_1 = tf.concat([a_1, tf.tile(tf.ones([1, 1]), [tf.shape(a_1)[0], 1])], 1) z_2 = tf.matmul(ab_1, w_2) a_2 = sigma(z_2) diff = tf.subtract(a_2, y) #Backward Pass reg2 = tf.Variable(0.001) reg1 = tf.Variable(0.001) d_z_2 = tf.multiply(diff, sigmaprime(z_2)) d_w_2 = tf.matmul(tf.transpose(tf.matmul(ab_1, F_1)), d_z_2)
x_dim = 784 eps = 1e-10 n_class = args.n_class n_cv = args.n_cv z_dim = n_cv * (n_class - 1) z_concate_dim = n_cv * n_class prior_logit0 = tf.get_variable("p_b_logit", dtype=tf.float32, initializer=tf.zeros([n_cv, n_class])) x = tf.placeholder(tf.float32, [None, x_dim]) x_binary = tf.to_float(x > .5) ntimes = tf.placeholder(tf.int32) x_binary = tf.tile(x_binary, (ntimes, 1)) N = tf.shape(x_binary)[0] #encoder q(z|x) z0 = encoder(x_binary, z_dim) z = tf.reshape(z0, [N, n_cv, n_class - 1]) zeros_logits = tf.zeros(shape=[N, n_cv, 1]) z_concate = tf.concat([zeros_logits, z], axis=2) q_b = Categorical(logits=z_concate) b_sample = q_b.sample() b_sample = tf.one_hot(b_sample, depth=n_class) b_sample_out = tf.cast(b_sample, tf.float32) b_sample_in = tf.placeholder(tf.float32, [None, n_cv, n_class])
def beam_search(symbols_to_logits_fn, initial_ids, beam_size, decode_length, vocab_size, alpha, states=None, eos_id=EOS_ID, stop_early=True, return_states=False): """Beam search with length penalties. Requires a function that can take the currently decoded symbols and return the logits for the next symbol. The implementation is inspired by https://arxiv.org/abs/1609.08144. When running, the beam search steps can be visualized by using tfdbg to watch the operations generating the output ids for each beam step. These operations have the pattern: (alive|finished)_topk_(seq,scores) Operations marked `alive` represent the new beam sequences that will be processed in the next step. Operations marked `finished` represent the completed beam sequences, which may be padded with 0s if no beams finished. Operations marked `seq` store the full beam sequence for the time step. Operations marked `scores` store the sequence's final log scores. The beam search steps will be processed sequentially in order, so when capturing observed from these operations, tensors, clients can make assumptions about which step is being recorded. WARNING: Assumes 2nd dimension of tensors in `states` and not invariant, this means that the shape of the 2nd dimension of these tensors will not be available (i.e. set to None) inside symbols_to_logits_fn. Args: symbols_to_logits_fn: Interface to the model, to provide logits. Shoud take [batch_size, decoded_ids] and return [batch_size, vocab_size] initial_ids: Ids to start off the decoding, this will be the first thing handed to symbols_to_logits_fn (after expanding to beam size) [batch_size] beam_size: Size of the beam. decode_length: Number of steps to decode for. vocab_size: Size of the vocab, must equal the size of the logits returned by symbols_to_logits_fn alpha: alpha for length penalty. states: dict (possibly nested) of decoding states. eos_id: ID for end of sentence. stop_early: a boolean - stop once best sequence is provably determined. return_states: a boolean - return the update states dictionary. Returns: Tuple of (decoded beams [batch_size, beam_size, decode_length] decoding probabilities [batch_size, beam_size]) and the decoding states if `return_states` is True. """ batch_size = _shape_list(initial_ids)[0] # Assume initial_ids are prob 1.0 initial_log_probs = tf.constant([[0.] + [-float("inf")] * (beam_size - 1)]) # Expand to beam_size (batch_size, beam_size) alive_log_probs = tf.tile(initial_log_probs, [batch_size, 1]) # Expand each batch and state to beam_size alive_seq = _expand_to_beam_size(initial_ids, beam_size) alive_seq = tf.expand_dims(alive_seq, axis=2) # (batch_size, beam_size, 1) if states: states = nest.map_structure( lambda state: _expand_to_beam_size(state, beam_size), states) else: states = {} # Finished will keep track of all the sequences that have finished so far # Finished log probs will be negative infinity in the beginning # finished_flags will keep track of booleans finished_seq = tf.zeros(_shape_list(alive_seq), tf.int32) # Setting the scores of the initial to negative infinity. finished_scores = tf.ones([batch_size, beam_size]) * -INF finished_flags = tf.zeros([batch_size, beam_size], tf.bool) def grow_finished(finished_seq, finished_scores, finished_flags, curr_seq, curr_scores, curr_finished): """Given sequences and scores, will gather the top k=beam size sequences. Args: finished_seq: Current finished sequences. [batch_size, beam_size, current_decoded_length] finished_scores: scores for each of these sequences. [batch_size, beam_size] finished_flags: finished bools for each of these sequences. [batch_size, beam_size] curr_seq: current topk sequence that has been grown by one position. [batch_size, beam_size, current_decoded_length] curr_scores: scores for each of these sequences. [batch_size, beam_size] curr_finished: Finished flags for each of these sequences. [batch_size, beam_size] Returns: Tuple of (Topk sequences based on scores, log probs of these sequences, Finished flags of these sequences) """ # First append a column of 0'ids to finished to make the same length with # finished scores finished_seq = tf.concat( [finished_seq, tf.zeros([batch_size, beam_size, 1], tf.int32)], axis=2) # Set the scores of the unfinished seq in curr_seq to large negative # values curr_scores += (1. - tf.to_float(curr_finished)) * -INF # concatenating the sequences and scores along beam axis curr_finished_seq = tf.concat([finished_seq, curr_seq], axis=1) curr_finished_scores = tf.concat([finished_scores, curr_scores], axis=1) curr_finished_flags = tf.concat([finished_flags, curr_finished], axis=1) return compute_topk_scores_and_seq( curr_finished_seq, curr_finished_scores, curr_finished_scores, curr_finished_flags, beam_size, batch_size, "grow_finished") def grow_alive(curr_seq, curr_scores, curr_log_probs, curr_finished, states): """Given sequences and scores, will gather the top k=beam size sequences. Args: curr_seq: current topk sequence that has been grown by one position. [batch_size, beam_size, i+1] curr_scores: scores for each of these sequences. [batch_size, beam_size] curr_log_probs: log probs for each of these sequences. [batch_size, beam_size] curr_finished: Finished flags for each of these sequences. [batch_size, beam_size] states: dict (possibly nested) of decoding states. Returns: Tuple of (Topk sequences based on scores, log probs of these sequences, Finished flags of these sequences) """ # Set the scores of the finished seq in curr_seq to large negative # values curr_scores += tf.to_float(curr_finished) * -INF return compute_topk_scores_and_seq(curr_seq, curr_scores, curr_log_probs, curr_finished, beam_size, batch_size, "grow_alive", states) def grow_topk(i, alive_seq, alive_log_probs, states): r"""Inner beam search loop. This function takes the current alive sequences, and grows them to topk sequences where k = 2*beam. We use 2*beam because, we could have beam_size number of sequences that might hit <EOS> and there will be no alive sequences to continue. With 2*beam_size, this will not happen. This relies on the assumption the vocab size is > beam size. If this is true, we'll have at least beam_size non <EOS> extensions if we extract the next top 2*beam words. Length penalty is given by = (5+len(decode)/6) ^ -\alpha. Pls refer to https://arxiv.org/abs/1609.08144. Args: i: loop index alive_seq: Topk sequences decoded so far [batch_size, beam_size, i+1] alive_log_probs: probabilities of these sequences. [batch_size, beam_size] states: dict (possibly nested) of decoding states. Returns: Tuple of (Topk sequences extended by the next word, The log probs of these sequences, The scores with length penalty of these sequences, Flags indicating which of these sequences have finished decoding, dict of transformed decoding states) """ # Get the logits for all the possible next symbols flat_ids = tf.reshape(alive_seq, [batch_size * beam_size, -1]) # (batch_size * beam_size, decoded_length) if states: flat_states = nest.map_structure(_merge_beam_dim, states) flat_logits, flat_states = symbols_to_logits_fn(flat_ids, i, flat_states) states = nest.map_structure( lambda t: _unmerge_beam_dim(t, batch_size, beam_size), flat_states) else: flat_logits = symbols_to_logits_fn(flat_ids) logits = tf.reshape(flat_logits, [batch_size, beam_size, -1]) # Convert logits to normalized log probs candidate_log_probs = _log_prob_from_logits(logits) # Multiply the probabilities by the current probabilities of the beam. # (batch_size, beam_size, vocab_size) + (batch_size, beam_size, 1) log_probs = candidate_log_probs + tf.expand_dims(alive_log_probs, axis=2) length_penalty = tf.pow(((5. + tf.to_float(i + 1)) / 6.), alpha) curr_scores = log_probs / length_penalty # Flatten out (beam_size, vocab_size) probs in to a list of possibilities flat_curr_scores = tf.reshape(curr_scores, [-1, beam_size * vocab_size]) topk_scores, topk_ids = tf.nn.top_k(flat_curr_scores, k=beam_size * 2) # Recovering the log probs because we will need to send them back topk_log_probs = topk_scores * length_penalty # Work out what beam the top probs are in. topk_beam_index = topk_ids // vocab_size topk_ids %= vocab_size # Unflatten the ids # The next three steps are to create coordinates for tf.gather_nd to pull # out the correct sequences from id's that we need to grow. # We will also use the coordinates to gather the booleans of the beam items # that survived. batch_pos = compute_batch_indices(batch_size, beam_size * 2) # top beams will give us the actual coordinates to do the gather. # stacking will create a tensor of dimension batch * beam * 2, where the # last dimension contains the i,j gathering coordinates. topk_coordinates = tf.stack([batch_pos, topk_beam_index], axis=2) # Gather up the most probable 2*beams both for the ids and finished_in_alive # bools topk_seq = tf.gather_nd(alive_seq, topk_coordinates) if states: states = nest.map_structure( lambda state: tf.gather_nd(state, topk_coordinates), states) # Append the most probable alive topk_seq = tf.concat([topk_seq, tf.expand_dims(topk_ids, axis=2)], axis=2) topk_finished = tf.equal(topk_ids, eos_id) return topk_seq, topk_log_probs, topk_scores, topk_finished, states def inner_loop(i, alive_seq, alive_log_probs, finished_seq, finished_scores, finished_flags, states): """Inner beam search loop. There are three groups of tensors, alive, finished, and topk. The alive group contains information about the current alive sequences The topk group contains information about alive + topk current decoded words the finished group contains information about finished sentences, that is, the ones that have decoded to <EOS>. These are what we return. The general beam search algorithm is as follows: While we haven't terminated (pls look at termination condition) 1. Grow the current alive to get beam*2 topk sequences 2. Among the topk, keep the top beam_size ones that haven't reached EOS into alive 3. Among the topk, keep the top beam_size ones have reached EOS into finished Repeat To make things simple with using fixed size tensors, we will end up inserting unfinished sequences into finished in the beginning. To stop that we add -ve INF to the score of the unfinished sequence so that when a true finished sequence does appear, it will have a higher score than all the unfinished ones. Args: i: loop index alive_seq: Topk sequences decoded so far [batch_size, beam_size, i+1] alive_log_probs: probabilities of the beams. [batch_size, beam_size] finished_seq: Current finished sequences. [batch_size, beam_size, i+1] finished_scores: scores for each of these sequences. [batch_size, beam_size] finished_flags: finished bools for each of these sequences. [batch_size, beam_size] states: dict (possibly nested) of decoding states. Returns: Tuple of (Incremented loop index New alive sequences, Log probs of the alive sequences, New finished sequences, Scores of the new finished sequences, Flags indicating which sequence in finished as reached EOS, dict of final decoding states) """ # Each inner loop, we carry out three steps: # 1. Get the current topk items. # 2. Extract the ones that have finished and haven't finished # 3. Recompute the contents of finished based on scores. topk_seq, topk_log_probs, topk_scores, topk_finished, states = grow_topk( i, alive_seq, alive_log_probs, states) alive_seq, alive_log_probs, _, states = grow_alive( topk_seq, topk_scores, topk_log_probs, topk_finished, states) finished_seq, finished_scores, finished_flags, _ = grow_finished( finished_seq, finished_scores, finished_flags, topk_seq, topk_scores, topk_finished) return (i + 1, alive_seq, alive_log_probs, finished_seq, finished_scores, finished_flags, states) def _is_finished(i, unused_alive_seq, alive_log_probs, unused_finished_seq, finished_scores, finished_in_finished, unused_states): """Checking termination condition. We terminate when we decoded up to decode_length or the lowest scoring item in finished has a greater score that the highest prob item in alive divided by the max length penalty Args: i: loop index alive_log_probs: probabilities of the beams. [batch_size, beam_size] finished_scores: scores for each of these sequences. [batch_size, beam_size] finished_in_finished: finished bools for each of these sequences. [batch_size, beam_size] Returns: Bool. """ if not stop_early: return tf.less(i, decode_length) max_length_penalty = tf.pow(((5. + tf.to_float(decode_length)) / 6.), alpha) # The best possible score of the most likely alive sequence. lower_bound_alive_scores = alive_log_probs[:, 0] / max_length_penalty # Now to compute the lowest score of a finished sequence in finished # If the sequence isn't finished, we multiply it's score by 0. since # scores are all -ve, taking the min will give us the score of the lowest # finished item. lowest_score_of_finished_in_finished = tf.reduce_min( finished_scores * tf.to_float(finished_in_finished), axis=1) # If none of the sequences have finished, then the min will be 0 and # we have to replace it by -ve INF if it is. The score of any seq in alive # will be much higher than -ve INF and the termination condition will not # be met. lowest_score_of_finished_in_finished += ( (1. - tf.to_float(tf.reduce_any(finished_in_finished, 1))) * -INF) bound_is_met = tf.reduce_all( tf.greater(lowest_score_of_finished_in_finished, lower_bound_alive_scores)) return tf.logical_and( tf.less(i, decode_length), tf.logical_not(bound_is_met)) (_, alive_seq, alive_log_probs, finished_seq, finished_scores, finished_flags, states) = tf.while_loop( _is_finished, inner_loop, [ tf.constant(0), alive_seq, alive_log_probs, finished_seq, finished_scores, finished_flags, states ], shape_invariants=[ tf.TensorShape([]), tf.TensorShape([None, None, None]), alive_log_probs.get_shape(), tf.TensorShape([None, None, None]), finished_scores.get_shape(), finished_flags.get_shape(), nest.map_structure(get_state_shape_invariants, states), ], parallel_iterations=1, back_prop=False) alive_seq.set_shape((None, beam_size, None)) finished_seq.set_shape((None, beam_size, None)) # Accounting for corner case: It's possible that no sequence in alive for a # particular batch item ever reached EOS. In that case, we should just copy # the contents of alive for that batch item. tf.reduce_any(finished_flags, 1) # if 0, means that no sequence for that batch index had reached EOS. We need # to do the same for the scores as well. finished_seq = tf.where( tf.reduce_any(finished_flags, 1), finished_seq, alive_seq) finished_scores = tf.where( tf.reduce_any(finished_flags, 1), finished_scores, alive_log_probs) if return_states: return finished_seq, finished_scores, states return finished_seq, finished_scores
def rcnn_proposals(proposals, bbox_pred, cls_prob, im_shape, num_classes, min_prob_threshold=0.0, class_max_detections=100): """ Args: proposals: Tensor with the RPN proposals bounding boxes. Shape (num_proposals, 4). Where num_proposals is less than POST_NMS_TOP_N (We don't know exactly beforehand) bbox_pred: Tensor with the RCNN delta predictions for each proposal for each class. Shape (num_proposals, 4 * num_classes) cls_prob: A softmax probability for each proposal where the idx = 0 is the background class (which we should ignore). Shape (num_proposals, num_classes + 1) Returns: objects: Shape (final_num_proposals, 4) Where final_num_proposals is unknown before-hand (it depends on NMS). The 4-length Tensor for each corresponds to: (x_min, y_min, x_max, y_max). objects_label: Shape (final_num_proposals,) objects_label_prob: Shape (final_num_proposals,) """ selected_boxes = [] selected_probs = [] selected_labels = [] TARGET_VARIANCES = np.array([0.1, 0.1, 0.2, 0.2]) # For each class, take the proposals with the class-specific # predictions (class scores and bbox regression) and filter accordingly # (valid area, min probability score and NMS). for class_id in range(num_classes): # Apply the class-specific transformations to the proposals to # obtain the current class' prediction. class_prob = cls_prob[:, class_id + 1] # 0 is background class. class_bboxes = bbox_pred[:, (4 * class_id):(4 * class_id + 4)] raw_class_objects = decode( proposals, class_bboxes * TARGET_VARIANCES, ) # Clip bboxes so they don't go out of the image. class_objects = clip_boxes(raw_class_objects, im_shape) # Filter objects based on the min probability threshold and on them # having a valid area. prob_filter = tf.greater_equal(class_prob, min_prob_threshold) (x_min, y_min, x_max, y_max) = tf.unstack(class_objects, axis=1) area_filter = tf.greater( tf.maximum(x_max - x_min, 0.0) * tf.maximum(y_max - y_min, 0.0), 0.0) object_filter = tf.logical_and(area_filter, prob_filter) class_objects = tf.boolean_mask(class_objects, object_filter) class_prob = tf.boolean_mask(class_prob, object_filter) # We have to use the TensorFlow's bounding box convention to use # the included function for NMS. class_objects_tf = change_order(class_objects) # Apply class NMS. class_selected_idx = tf.image.non_max_suppression( class_objects_tf, class_prob, class_max_detections, iou_threshold=CLASS_NMS_THRESHOLD) # Using NMS resulting indices, gather values from Tensors. class_objects_tf = tf.gather(class_objects_tf, class_selected_idx) class_prob = tf.gather(class_prob, class_selected_idx) # Revert to our bbox convention. class_objects = change_order(class_objects_tf) # We append values to a regular list which will later be # transformed to a proper Tensor. selected_boxes.append(class_objects) selected_probs.append(class_prob) # In the case of the class_id, since it is a loop on classes, we # already have a fixed class_id. We use `tf.tile` to create that # Tensor with the total number of indices returned by the NMS. selected_labels.append( tf.tile([class_id], [tf.shape(class_selected_idx)[0]])) # We use concat (axis=0) to generate a Tensor where the rows are # stacked on top of each other objects = tf.concat(selected_boxes, axis=0) proposal_label = tf.concat(selected_labels, axis=0) proposal_label_prob = tf.concat(selected_probs, axis=0) # Get top-k detections of all classes. k = tf.minimum(TOTAL_MAX_DETECTIONS, tf.shape(proposal_label_prob)[0]) top_k = tf.nn.top_k(proposal_label_prob, k=k) top_k_proposal_label_prob = top_k.values top_k_objects = tf.gather(objects, top_k.indices) top_k_proposal_label = tf.gather(proposal_label, top_k.indices) return ( top_k_objects, top_k_proposal_label, top_k_proposal_label_prob, )
def _remove_dilations(self): """ This method removes the dilations by extracting the values from the input for every sliding window according to the dilations, strides and kernel size and generates output that can be used by pooling operations with strides = kernel_shape to accomplish dilated pooling Example: Input: [[ 0, 1, 2, 3], [ 4, 5, 6, 7], [ 8, 9, 10, 11], [ 12, 13, 14, 15]] Kernel: [2, 2] Dilations: [2, 2] Strides: [1, 1] Will return: [[ 0, 2, 1, 3], [ 8, 10, 9, 11], [ 4, 6, 5, 7], [ 12, 14, 13, 15]] After max_pool2d with kernel_shape = strides = [2, 2] the result is: [[ 10, 11], [ 14, 15]] """ input_shape = tf_shape(self.input) in_spatial_shape = input_shape[1:self.spatial_size + 1] channels_count = input_shape[self.spatial_size + 1] # Initialize gather_ind with the range of channels # e.g. [0 1] gather_ind = tf.range(channels_count, dtype=tf.int64) # convert the vector to column vector # in the following logic we use column vectors gather_ind = tf.expand_dims(gather_ind, 1) # initilize the output_shape with zeros # self.output_shape will contain the shape of the # output tensor after the loop below is executed self.output_shape = [0] * (self.spatial_size + 2) self.output_shape[0] = input_shape[0] """ Loop over the input spatial dimensions starting from the last (most internal) going up to the first dimension On every step of the loop calculate the output indices and map them to the input indices using `_calc_input_ind`, then "combine" with the already calculated indices from the previous dimensions using cartesian product. For the following example input: Input: [[ 0, 1, 2, 3], [ 4, 5, 6, 7], [ 8, 9, 10, 11], [ 12, 13, 14, 15]] Kernel: [2, 2] Dilations: [2, 2] Strides: [1, 1] these are the steps that will be executed: 1. Initilize gather_ind = [[0]] # we have only 1 channel 2. Loop step 0 (axis 1): filter_size = 3 output_size = 4 dim_ind = [[0] [2] [1] [3]] gather_ind = [[0 0] [2 0] [1 0] [3 0]] 3. Loop step 1 (axis 0): filter_size = 3 output_size = 4 dim_ind = [[0] [2] [1] [3]] gather_ind = [[0 0 0] [0 2 0] [0 1 0] [0 3 0] [2 0 0] [2 2 0] [2 1 0] [2 3 0] [1 0 0] [1 2 0] [1 1 0] [1 3 0] [3 0 0] [3 2 0] [3 1 0] [3 3 0]] These are the indices used for gather_nd operation to collect the values from the input data. """ for dim in range(self.spatial_size - 1, -1, -1): filter_size = (self.kernel_shape[dim] - 1) * \ self.dilations[dim] + 1 output_size = (( (in_spatial_shape[dim] - filter_size) // self.strides[dim]) + 1 ) * self.kernel_shape[dim] self.output_shape[dim + 1] = output_size # initialize the output dimension index with the range of the # dimension output size (e.g. 4): [0, 1, 2, 3] dim_ind = tf.range(output_size) # calculate the matching indices in the input data # [0, 1, 2, 3] will calculate to [0, 2, 1, 3] # from the above example dim_ind = self._calc_input_ind(dim_ind, self.kernel_shape[dim], self.dilations[dim], self.strides[dim]) # convert to column vector dim_ind = tf.expand_dims(dim_ind, 1) # "combine" current dimension indices with the previous dimensions # using cartesian product gather_ind = tf_product(dim_ind, gather_ind) # The result from the above loop for 2D data will be: # [[y1, x1, c], [y2, x2, c], ..., [yn, xm, c]] where n is the height, # m is the width and c is the channel number. # set the channels count in the output_shape self.output_shape[self.spatial_size + 1] = channels_count # expand the dimensions to match the input dimensions + 1 for x in range(self.spatial_size): gather_ind = tf.expand_dims(gather_ind, 0) # dublicate the indices for every batch gather_ind = tf.tile(gather_ind, [input_shape[0]] + [1] * (self.spatial_size + 1)) # extract the selected values from the input output = tf.gather_nd(self.input, gather_ind, batch_dims=1) # reshape the output to the correct shape calculated earlier output = tf.reshape(output, self.output_shape) return output