def testEmptyOutputShape1(self): indices = tf.zeros([2, 2, 2], tf.int32) updates = tf.zeros([2, 2, 2], tf.int32) shape = tf.constant([0, 3, 2], tf.int32) with self.assertRaisesWithPredicateMatch( ValueError, "Indices and updates specified for empty output shape"): tf.scatter_nd(indices, updates, shape)
def testRank3InvalidShape2(self): indices = tf.zeros([2, 2, 1], tf.int32) updates = tf.zeros([2, 2], tf.int32) shape = np.array([2, 2, 2]) with self.assertRaisesWithPredicateMatch( ValueError, "The inner \\d+ dimensions of output\\.shape="): tf.scatter_nd(indices, updates, shape) ref = tf.Variable(tf.zeros(shape, tf.int32)) with self.assertRaisesWithPredicateMatch( ValueError, "The inner \\d+ dimensions of ref\\.shape="): tf.scatter_nd_update(ref, indices, updates)
def testEmptyOutputShape2(self): indices = tf.placeholder(tf.int32, shape=None) updates = tf.placeholder(tf.int32, shape=None) shape = tf.constant([0, 3, 2], tf.int32) with self.test_session(): tf.scatter_nd(indices, updates, shape).eval(feed_dict={ indices: np.zeros( [2, 2, 2], dtype=np.int32), updates: np.zeros( [2, 2, 2], dtype=np.int32) })
def hnet_loss(gt_pts, transformation_coeffcient, name): """ :param gt_pts: 原始的标签点对 [x, y, 1] :param transformation_coeffcient: 映射矩阵参数(6参数矩阵) [[a, b, c], [0, d, e], [0, f, 1]] :param name: :return: """ with tf.variable_scope(name): # 首先映射原始标签点对 transformation_coeffcient = tf.concat([transformation_coeffcient, [1.0]], axis=-1) H_indices = tf.constant([[0], [1], [2], [4], [5], [7], [8]]) H_shape = tf.constant([9]) H = tf.scatter_nd(H_indices, transformation_coeffcient, H_shape) H = tf.reshape(H, shape=[3, 3]) gt_pts = tf.transpose(gt_pts) pts_projects = tf.matmul(H, gt_pts) # 求解最小二乘二阶多项式拟合参数矩阵 Y = tf.transpose(pts_projects[1, :]) X = tf.transpose(pts_projects[0, :]) Y_One = tf.add(tf.subtract(Y, Y), tf.constant(1.0, tf.float32)) Y_stack = tf.stack([tf.pow(Y, 3), tf.pow(Y, 2), Y, Y_One], axis=1) w = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(tf.transpose(Y_stack), Y_stack)), tf.transpose(Y_stack)), tf.expand_dims(X, -1)) # 利用二阶多项式参数求解拟合位置并反算到原始投影空间计算损失 x_preds = tf.matmul(Y_stack, w) preds = tf.transpose(tf.stack([tf.squeeze(x_preds, -1), Y, Y_One], axis=1)) x_transformation_back = tf.matmul(tf.matrix_inverse(H), preds) loss = tf.reduce_mean(tf.pow(gt_pts[0, :] - x_transformation_back[0, :], 2)) return loss
def hnet_transformation(gt_pts, transformation_coeffcient, name): """ :param gt_pts: :param transformation_coeffcient: :param name: :return: """ with tf.variable_scope(name): # 首先映射原始标签点对 transformation_coeffcient = tf.concat([transformation_coeffcient, [1.0]], axis=-1) H_indices = tf.constant([[0], [1], [2], [4], [5], [7], [8]]) H_shape = tf.constant([9]) H = tf.scatter_nd(H_indices, transformation_coeffcient, H_shape) H = tf.reshape(H, shape=[3, 3]) gt_pts = tf.transpose(gt_pts) pts_projects = tf.matmul(H, gt_pts) # 求解最小二乘二阶多项式拟合参数矩阵 Y = tf.transpose(pts_projects[1, :]) X = tf.transpose(pts_projects[0, :]) Y_One = tf.add(tf.subtract(Y, Y), tf.constant(1.0, tf.float32)) Y_stack = tf.stack([tf.pow(Y, 3), tf.pow(Y, 2), Y, Y_One], axis=1) w = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(tf.transpose(Y_stack), Y_stack)), tf.transpose(Y_stack)), tf.expand_dims(X, -1)) # 利用二阶多项式参数求解拟合位置 x_preds = tf.matmul(Y_stack, w) preds = tf.transpose(tf.stack([tf.squeeze(x_preds, -1), Y, Y_One], axis=1)) preds_fit = tf.stack([tf.squeeze(x_preds, -1), Y], axis=1) x_transformation_back = tf.matmul(tf.matrix_inverse(H), preds) return x_transformation_back
def unpool_layer2x2_batch(self, bottom, argmax): bottom_shape = tf.shape(bottom) top_shape = [bottom_shape[0], bottom_shape[1] * 2, bottom_shape[2] * 2, bottom_shape[3]] batch_size = top_shape[0] height = top_shape[1] width = top_shape[2] channels = top_shape[3] argmax_shape = tf.to_int64([batch_size, height, width, channels]) argmax = self.unravel_argmax(argmax, argmax_shape) t1 = tf.to_int64(tf.range(channels)) t1 = tf.tile(t1, [batch_size * (width // 2) * (height // 2)]) t1 = tf.reshape(t1, [-1, channels]) t1 = tf.transpose(t1, perm=[1, 0]) t1 = tf.reshape(t1, [channels, batch_size, height // 2, width // 2, 1]) t1 = tf.transpose(t1, perm=[1, 0, 2, 3, 4]) t2 = tf.to_int64(tf.range(batch_size)) t2 = tf.tile(t2, [channels * (width // 2) * (height // 2)]) t2 = tf.reshape(t2, [-1, batch_size]) t2 = tf.transpose(t2, perm=[1, 0]) t2 = tf.reshape(t2, [batch_size, channels, height // 2, width // 2, 1]) t3 = tf.transpose(argmax, perm=[1, 4, 2, 3, 0]) t = tf.concat(4, [t2, t3, t1]) indices = tf.reshape(t, [(height // 2) * (width // 2) * channels * batch_size, 4]) x1 = tf.transpose(bottom, perm=[0, 3, 1, 2]) values = tf.reshape(x1, [-1]) return tf.scatter_nd(indices, values, tf.to_int64(top_shape))
def update_slices(slices, indices, dense_tensor, head_dims): """Reconstitutes a tensor from slices and corresponding indices. Like _stack_tensor, but instead of setting missing slices to 0, sets them to what they were in the original tensor. The return value is reshaped to be the same as dense_tensor. Args: slices: a tensor. Shape [K, D_1, ...] indices: a 1-D integer tensor. Shape: [K] dense_tensor: the original tensor the slices were taken from. Shape: [D_0, D_1, ...] head_dims: True dimensions of the dense_tensor's first dimension. Returns: Reconsituted tensor. Shape: [D_0, D_1, ...] """ # NOTE(siege): This cast shouldn't be necessary. indices = tf.cast(indices, tf.int32) tail_dims = tf.shape(dense_tensor)[1:] dense_shape = tf.concat([head_dims, tail_dims], 0) update_mask_vals = tf.fill(tf.shape(indices), 1) reshaped_indices = tf.expand_dims(indices, -1) update_mask = tf.equal( tf.scatter_nd(reshaped_indices, update_mask_vals, head_dims[:1]), 1) reshaped_dense_slices = tf.reshape( stack_tensor(slices, indices, dense_tensor, head_dims), dense_shape) reshaped_dense_tensor = tf.reshape(dense_tensor, dense_shape) return tf.reshape( tf.where(update_mask, reshaped_dense_slices, reshaped_dense_tensor), tf.shape(dense_tensor))
def stack_tensor(slices, indices, dense_tensor, head_dims): """Reconsititutes a tensor from slices and corresponding indices. This is an inverse operation to slice_tensor. Missing slices are set to 0. Args: slices: a tensor. Shape [K, D_1, ...] indices: a 1-D integer tensor. Shape: [K] dense_tensor: the original tensor the slices were taken from. Shape: [D_0, D_1, ...] head_dims: True dimensions of the dense_tensor's first dimension. Returns: Reconsituted tensor. Shape: [D_0, D_1, ...] """ # NOTE(siege): This cast shouldn't be necessary. indices = tf.cast(indices, tf.int32) tail_dims = tf.shape(dense_tensor)[1:] dense_shape = tf.concat([head_dims, tail_dims], 0) slices = tf.reshape(slices, tf.concat([[-1], dense_shape[1:]], 0)) indices = tf.expand_dims(indices, -1) return tf.reshape(tf.scatter_nd(indices, slices, dense_shape), tf.shape(dense_tensor))
def compute_module(accum, module): mask = tf.equal(module, selection) reduced_mask = tf.reduce_any(mask, axis=-1) indices = tf.where(reduced_mask) affected_inp = tf.boolean_mask(inputs, reduced_mask) output = module_fnc(affected_inp, module) return accum + tf.scatter_nd(indices, output, tf.cast(output_shape, tf.int64))
def hard_negative_mining(): bboxes_per_batch = tf.unstack(bboxes) classification_loss_per_batch = tf.unstack(classification_loss) num_positives_per_batch = tf.unstack(tf.reduce_sum(positives, axis=-1)) neg_class_loss_per_batch = tf.unstack(neg_class_loss_all) neg_class_losses = [] total_negatives = [] for bboxes_per_image, classification_loss_per_image, num_positives_per_image, neg_class_loss_per_image in \ zip(bboxes_per_batch, classification_loss_per_batch, num_positives_per_batch, neg_class_loss_per_batch): min_negatives_keep = tf.maximum(self.neg_pos_ratio * num_positives_per_image, 3) num_negatives_keep = tf.minimum(min_negatives_keep, tf.count_nonzero(neg_class_loss_per_image, dtype=tf.float32)) indices = tf.image.non_max_suppression(bboxes_per_image, classification_loss_per_image, tf.to_int32(num_negatives_keep), iou_threshold=0.99) num_negatives = tf.size(indices) total_negatives.append(num_negatives) expanded_indexes = tf.expand_dims(indices, axis=1) # shape: (num_negatives, 1) negatives_keep = tf.scatter_nd(expanded_indexes, updates=tf.ones_like(indices, dtype=tf.int32), shape=tf.shape(classification_loss_per_image)) # shape: (num_priors,) negatives_keep = tf.to_float(tf.reshape(negatives_keep, [num_priors])) # shape: (batch_size, num_priors) neg_class_losses.append(tf.reduce_sum(classification_loss_per_image * negatives_keep, axis=-1)) # shape: (1,) return tf.stack(neg_class_losses), tf.reduce_sum(tf.stack(total_negatives))
def call(self, x, padding=None): # Retrieve dynamically known shapes batch_size = tf.shape(x)[0] length = tf.shape(x)[1] if padding is not None: with tf.name_scope("remove_padding"): # Flatten padding to [batch_size*length] pad_mask = tf.reshape(padding, [-1]) nonpad_ids = tf.to_int32(tf.where(pad_mask < 1e-9)) # Reshape x to [batch_size*length, hidden_size] to remove padding x = tf.reshape(x, [-1, self.hidden_size]) x = tf.gather_nd(x, indices=nonpad_ids) # Reshape x from 2 dimensions to 3 dimensions. x.set_shape([None, self.hidden_size]) x = tf.expand_dims(x, axis=0) output = self.filter_dense_layer(x) if self.train: output = tf.nn.dropout(output, 1.0 - self.relu_dropout) output = self.output_dense_layer(output) if padding is not None: with tf.name_scope("re_add_padding"): output = tf.squeeze(output, axis=0) output = tf.scatter_nd( indices=nonpad_ids, updates=output, shape=[batch_size * length, self.hidden_size] ) output = tf.reshape(output, [batch_size, length, self.hidden_size]) return output
def testScatterNdRepatedIndicesAdd(self): indices = tf.zeros([100000, 1], tf.int32) values = np.random.randn(100000) shape = [1] with self.test_session(): val = tf.scatter_nd(indices, values, shape).eval() self.assertAllClose([np.sum(values)], val)
def max_unpool(inputs, pooling_indices, output_shape=None, k_size=[1, 2, 2, 1]): # NOTE! this function is based on the implementation by kwotsin in # https://github.com/kwotsin/TensorFlow-ENet # inputs has shape [batch_size, height, width, channels] # pooling_indices: pooling indices of the previously max_pooled layer # output_shape: what shape the returned tensor should have pooling_indices = tf.cast(pooling_indices, tf.int32) input_shape = tf.shape(inputs, out_type=tf.int32) one_like_pooling_indices = tf.ones_like(pooling_indices, dtype=tf.int32) batch_shape = tf.concat([[input_shape[0]], [1], [1], [1]], 0) batch_range = tf.reshape(tf.range(input_shape[0], dtype=tf.int32), shape=batch_shape) b = one_like_pooling_indices*batch_range y = pooling_indices//(output_shape[2]*output_shape[3]) x = (pooling_indices//output_shape[3]) % output_shape[2] feature_range = tf.range(output_shape[3], dtype=tf.int32) f = one_like_pooling_indices*feature_range inputs_size = tf.size(inputs) indices = tf.transpose(tf.reshape(tf.stack([b, y, x, f]), [4, inputs_size])) values = tf.reshape(inputs, [inputs_size]) ret = tf.scatter_nd(indices, values, output_shape) return ret
def testEmptyOutputShape3(self): indices = tf.zeros([0], tf.int32) updates = tf.zeros([0], tf.int32) shape = tf.constant([0], tf.int32) scatter = tf.scatter_nd(indices, updates, shape) with self.test_session(): self.assertEqual(scatter.eval().size, 0)
def _unsparsify(var_x): if not isinstance(var_x, tf.IndexedSlices): return var_x assert var_x.dense_shape is not None, \ "memory_saving_gradients encountered sparse gradients of unknown shape" indices = var_x.indices while indices.shape.ndims < var_x.values.shape.ndims: indices = tf.expand_dims(indices, -1) return tf.scatter_nd(indices, var_x.values, var_x.dense_shape)
def testRank3ValidShape(self): indices = tf.zeros([2, 2, 2], tf.int32) updates = tf.zeros([2, 2, 2], tf.int32) shape = np.array([2, 2, 2]) self.assertAllEqual( tf.scatter_nd(indices, updates, shape).get_shape().as_list(), shape) ref = tf.Variable(tf.zeros(shape, tf.int32)) self.assertAllEqual( tf.scatter_nd_update(ref, indices, updates).get_shape().as_list(), shape)
def get_shuffle_ind(self, size): if self.shuffle_ind is None: # put the shuffle in tf memory to make the eval jobs # re-entrant. shuffle_ind_val = np.random.permutation(size) shuffle_ind = tf.get_variable( name='shuffle_ind', dtype=tf.int64, initializer=shuffle_ind_val) unshuffle_ind = tf.scatter_nd( tf.reshape(shuffle_ind, [-1, 1]), tf.range(size), [size]) return shuffle_ind, unshuffle_ind
def testGradientsRank2SliceUpdate(self): indices = tf.constant([[1], [0]], dtype=tf.int32) updates = tf.constant([[3, 4], [1, 2]], dtype=tf.float64) shape = tf.constant([2, 2], dtype=tf.int32) outputs = tf.scatter_nd(indices, updates, shape) grad_vals = tf.constant([[3, 4], [1, 2]], dtype=tf.float64) grads = tf.gradients([outputs], [updates], [grad_vals])[0] expected_grads = np.array([[1, 2], [3, 4]], dtype=np.float64) with self.test_session(): self.assertAllEqual(expected_grads, grads.eval())
def set_final(sequence, sequence_length, values, time_major=False): """Sets the final values in a batch of sequences, and clears those after.""" sequence_batch_major = ( sequence if not time_major else tf.transpose(sequence, [1, 0, 2])) final_index = _get_final_index(sequence_length, time_major=False) mask = tf.sequence_mask( tf.maximum(0, sequence_length - 1), maxlen=sequence_batch_major.shape[1], dtype=tf.float32) sequence_batch_major = ( tf.expand_dims(mask, axis=-1) * sequence_batch_major + tf.scatter_nd(final_index, values, tf.shape(sequence_batch_major))) return (sequence_batch_major if not time_major else tf.transpose(sequence_batch_major, [1, 0, 2]))
def call(self, x, padding=None): """Return outputs of the feedforward network. Args: x: tensor with shape [batch_size, length, hidden_size] padding: (optional) If set, the padding values are temporarily removed from x (provided self.allow_pad is set). The padding values are placed back in the output tensor in the same locations. shape [batch_size, length] Returns: Output of the feedforward network. tensor with shape [batch_size, length, hidden_size] """ padding = None if not self.allow_pad else padding # Retrieve dynamically known shapes batch_size = tf.shape(x)[0] length = tf.shape(x)[1] if padding is not None: with tf.name_scope("remove_padding"): # Flatten padding to [batch_size*length] pad_mask = tf.reshape(padding, [-1]) nonpad_ids = tf.to_int32(tf.where(pad_mask < 1e-9)) # Reshape x to [batch_size*length, hidden_size] to remove padding x = tf.reshape(x, [-1, self.hidden_size]) x = tf.gather_nd(x, indices=nonpad_ids) # Reshape x from 2 dimensions to 3 dimensions. x.set_shape([None, self.hidden_size]) x = tf.expand_dims(x, axis=0) output = self.filter_dense_layer(x) if self.train: output = tf.nn.dropout(output, 1.0 - self.relu_dropout) output = self.output_dense_layer(output) if padding is not None: with tf.name_scope("re_add_padding"): output = tf.squeeze(output, axis=0) output = tf.scatter_nd( indices=nonpad_ids, updates=output, shape=[batch_size * length, self.hidden_size] ) output = tf.reshape(output, [batch_size, length, self.hidden_size]) return output
def __call__(self, shape, dtype=None, partition_info=None): del partition_info # unused assert len(shape) > 2, shape support = tuple(shape[:-2]) + (1, 1) indices = [[s // 2 for s in support]] updates = tf.constant([self.gain], dtype=dtype) kernel = tf.scatter_nd(indices, updates, support) assert shape[-2] == shape[-1], shape if shape[-1] != 1: kernel *= tf.eye(shape[-1], dtype=dtype) return kernel
def compute(bse): batch = bse[0] start = bse[1] end = bse[2] size = end - start seg_ended = input[batch,start:end,1] idx = tf.where(tf.not_equal(seg_ended, 0)) length_dist = tf.scatter_nd(idx, end_distribution[:tf.shape(idx)[0]], (window,)) length_dist += no_label_backup[window-size:2*window-size] length_dist = length_dist / tf.reduce_sum(length_dist) length_dist = tf.expand_dims(length_dist, -1) result = onehot[batch,start:start+window,:] * length_dist return result
def compute(bse): batch = bse[0] start = bse[1] end = bse[2] batch_cls = classes[batch][start:end] cls_not_eq = tf.not_equal(batch_cls[:-1], batch_cls[1:]) cls_changed = tf.concat([cls_not_eq, [True]], axis=0) idx = tf.where(cls_changed) count = tf.squeeze(tf.concat([[idx[0] + 1], idx[1:] - idx[:-1]], axis=0), axis=1) freq = tf.cast(count, dtype='float32') res = tf.scatter_nd(idx, tf.cast(count, dtype='float32') / tf.cast(end - start, dtype='float32'), (window,)) return res
def f2(): # Now we'll identify the top-k (where k == `n_negative_keep`) boxes with the highest confidence loss that # belong to the background class in the ground truth data. Note that this doesn't necessarily mean that the model # predicted the wrong class for those boxes, it just means that the loss for those boxes is the highest. # To do this, we reshape `neg_class_loss_all` to 1D... neg_class_loss_all_1D = tf.reshape(neg_class_loss_all, [-1]) # Tensor of shape (batch_size * n_boxes,) # ...and then we get the indices for the `n_negative_keep` boxes with the highest loss out of those... values, indices = tf.nn.top_k(neg_class_loss_all_1D, n_negative_keep, False) # We don't need sorting # ...and with these indices we'll create a mask... negatives_keep = tf.scatter_nd(tf.expand_dims(indices, axis=1), updates=tf.ones_like(indices, dtype=tf.int32), shape=tf.shape(neg_class_loss_all_1D)) # Tensor of shape (batch_size * n_boxes,) negatives_keep = tf.to_float(tf.reshape(negatives_keep, [batch_size, n_boxes])) # Tensor of shape (batch_size, n_boxes) # ...and use it to keep only those boxes and mask all other classification losses neg_class_loss = tf.reduce_sum(classification_loss * negatives_keep, axis=-1) # Tensor of shape (batch_size,) return neg_class_loss
def testExtraIndicesDimensions(self): indices = tf.zeros([1, 1, 2], tf.int32) updates = tf.zeros([1, 1], tf.int32) shape = np.array([2, 2]) scatter = tf.scatter_nd(indices, updates, shape) self.assertAllEqual(scatter.get_shape().as_list(), shape) expected_result = np.zeros([2, 2], dtype=np.int32) with self.test_session(): self.assertAllEqual(expected_result, scatter.eval()) ref = tf.Variable(tf.zeros(shape, tf.int32)) scatter_update = tf.scatter_nd_update(ref, indices, updates) self.assertAllEqual(scatter_update.get_shape().as_list(), shape) with self.test_session(): ref.initializer.run() self.assertAllEqual(expected_result, scatter_update.eval())
def compute(bse): batch = bse[0] start = bse[1] end = bse[2] size = end - start cls_chg = cls_changed[batch][start:end] idx = tf.where(cls_chg) res = tf.scatter_nd(idx, end_distribution[:tf.shape(idx)[0]], (window,)) if min_length > 1: res *= min_length_filter res += no_label_backup[window-size:2*window-size] res = res / tf.reduce_sum(res) if smoothing is not None: res = tf.tensordot(res, smoothing.read(size - 1), [[0], [0]]) return res
def restore(self, x): """Add padding back to the given tensor. Args: x (tf.Tensor): of shape [dim_compressed,...] Returns: a tensor of shape [dim_origin,...] with dim_compressed >= dim_origin. The dim is restored from the original reference tensor """ with tf.name_scope("pad_reduce/restore"): x = tf.scatter_nd( indices=self.nonpad_ids, updates=x, shape=tf.concat([self.dim_origin, tf.shape(x)[1:]], axis=0), ) return x
def combine_loss_val(embedding, labels, w_init, out_num, margin_a, margin_m, margin_b, s): ''' This code is contributed by RogerLo. Thanks for you contribution. :param embedding: the input embedding vectors :param labels: the input labels, the shape should be eg: (batch_size, 1) :param s: scalar value default is 64 :param out_num: output class num :param m: the margin value, default is 0.5 :return: the final cacualted output, this output is send into the tf.nn.softmax directly ''' weights = tf.get_variable(name='embedding_weights', shape=(embedding.get_shape().as_list()[-1], out_num), initializer=w_init, dtype=tf.float32) weights_unit = tf.nn.l2_normalize(weights, axis=0) embedding_unit = tf.nn.l2_normalize(embedding, axis=1) cos_t = tf.matmul(embedding_unit, weights_unit) ordinal = tf.constant(list(range(0, embedding.get_shape().as_list()[0])), tf.int64) ordinal_y = tf.stack([ordinal, labels], axis=1) zy = cos_t * s sel_cos_t = tf.gather_nd(zy, ordinal_y) if margin_a != 1.0 or margin_m != 0.0 or margin_b != 0.0: if margin_a == 1.0 and margin_m == 0.0: s_m = s * margin_b new_zy = sel_cos_t - s_m else: cos_value = sel_cos_t / s t = tf.acos(cos_value) if margin_a != 1.0: t = t * margin_a if margin_m > 0.0: t = t + margin_m body = tf.cos(t) if margin_b > 0.0: body = body - margin_b new_zy = body * s updated_logits = tf.add(zy, tf.scatter_nd(ordinal_y, tf.subtract(new_zy, sel_cos_t), zy.get_shape())) loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=updated_logits)) predict_cls = tf.argmax(updated_logits, 1) accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.cast(predict_cls, tf.int64), tf.cast(labels, tf.int64)), 'float')) predict_cls_s = tf.argmax(zy, 1) accuracy_s = tf.reduce_mean(tf.cast(tf.equal(tf.cast(predict_cls_s, tf.int64), tf.cast(labels, tf.int64)), 'float')) return zy, loss, accuracy, accuracy_s, predict_cls_s
def build_step(self, signals): input = signals.gather(self.input_data) input = tf.reshape(input, (self.n_ops, -1)) state = signals.gather(self.state_sig) # compute output if self.C is None: output = tf.zeros_like(input) else: output = state * self.C output = tf.reshape( output, (self.n_ops, -1, signals.minibatch_size * self.signal_d)) output = tf.reduce_sum(output, axis=1) if self.D is not None: output += self.D * input signals.scatter(self.output_data, output) # update state if LooseVersion(tf.__version__) < LooseVersion("1.7.0"): mat_mul = gen_sparse_ops._sparse_tensor_dense_mat_mul else: mat_mul = gen_sparse_ops.sparse_tensor_dense_mat_mul r = mat_mul(self.A_indices, self.A, self.A_shape, state) with tf.control_dependencies([output]): state = r + tf.scatter_nd(self.offsets, input, self.state_sig.shape) # TODO: tensorflow does not yet support sparse_tensor_dense_add # on the GPU # state = gen_sparse_ops._sparse_tensor_dense_add( # self.offsets, input, self.state_sig.shape, r) state.set_shape(self.state_sig.shape) signals.mark_gather(self.input_data) signals.mark_gather(self.state_sig) signals.scatter(self.state_sig, state)
def _myForwardPass(self): cnn_feats = self._ph.cnn_feats pred_polys = self._ph.pred_polys pred_mask_imgs = self._ph.pred_mask_imgs last_cell_state_1 = self._ph.cells_1[:, -1, :, :, :] last_cell_state_2 = self._ph.cells_2[:, -1, :, :, :] weight_decay = 0.00001 predicted_history = tf.zeros(shape=(self.batch_size, 28, 28, 1)) # Drawing the canvas for i in range(self.seq_len): pred_polys_t = pred_polys[:, i] # batch x indices = tf.concat( [tf.reshape(tf.range(0, self.batch_size), (self.batch_size, 1)), tf.cast(pred_polys_t, tf.int32)], axis=1) updates = tf.ones(shape=self.batch_size) pred_polys_t = tf.scatter_nd(indices, updates, shape=(self.batch_size, 28, 28)) predicted_history = predicted_history + tf.expand_dims(pred_polys_t, axis=-1) xt = tf.concat([cnn_feats, predicted_history, pred_mask_imgs, last_cell_state_1, last_cell_state_2], axis=3) with slim.arg_scope([slim.conv2d], kernel_size=[3, 3], stride=1, weights_regularizer=slim.l2_regularizer(weight_decay), activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params={"is_training": self.is_training, "decay": 0.99, "center": True, "scale": True}, weights_initializer=layers.variance_scaling_initializer( factor=2.0, mode='FAN_IN', uniform=False) ): self._conv1 = slim.conv2d(xt, scope="conv1", num_outputs=16) self._conv2 = slim.conv2d(self._conv1, scope="conv2", num_outputs=1) output = layers.fully_connected(slim.flatten(self._conv2), 1, weights_regularizer=layers.l2_regularizer(1e-5), scope="FC") return output
def train(train_data, test_data=None): G = train_data[0] features = train_data[1] id_map = train_data[2] if not features is None: # pad with dummy zero vector features = np.vstack([features, np.zeros((features.shape[1], ))]) context_pairs = train_data[3] if FLAGS.random_context else None placeholders = construct_placeholders() minibatch = EdgeMinibatchIterator(G, id_map, placeholders, batch_size=FLAGS.batch_size, max_degree=FLAGS.max_degree, num_neg_samples=FLAGS.neg_sample_size, context_pairs=context_pairs) adj_info_ph = tf.compat.v1.placeholder(tf.int32, shape=minibatch.adj.shape) adj_info = tf.Variable(adj_info_ph, trainable=False, name="adj_info") if FLAGS.model == 'graphsage_mean': # Create model sampler = UniformNeighborSampler(adj_info) layer_infos = [ SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1), SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2) ] model = SampleAndAggregate(placeholders, features, adj_info, minibatch.deg, layer_infos=layer_infos, model_size=FLAGS.model_size, identity_dim=FLAGS.identity_dim, logging=True) elif FLAGS.model == 'gcn': # Create model sampler = UniformNeighborSampler(adj_info) layer_infos = [ SAGEInfo("node", sampler, FLAGS.samples_1, 2 * FLAGS.dim_1), SAGEInfo("node", sampler, FLAGS.samples_2, 2 * FLAGS.dim_2) ] model = SampleAndAggregate(placeholders, features, adj_info, minibatch.deg, layer_infos=layer_infos, aggregator_type="gcn", model_size=FLAGS.model_size, identity_dim=FLAGS.identity_dim, concat=False, logging=True) elif FLAGS.model == 'graphsage_seq': sampler = UniformNeighborSampler(adj_info) layer_infos = [ SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1), SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2) ] model = SampleAndAggregate(placeholders, features, adj_info, minibatch.deg, layer_infos=layer_infos, identity_dim=FLAGS.identity_dim, aggregator_type="seq", model_size=FLAGS.model_size, logging=True) elif FLAGS.model == 'graphsage_maxpool': sampler = UniformNeighborSampler(adj_info) layer_infos = [ SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1), SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2) ] model = SampleAndAggregate(placeholders, features, adj_info, minibatch.deg, layer_infos=layer_infos, aggregator_type="maxpool", model_size=FLAGS.model_size, identity_dim=FLAGS.identity_dim, logging=True) elif FLAGS.model == 'graphsage_meanpool': sampler = UniformNeighborSampler(adj_info) layer_infos = [ SAGEInfo("node", sampler, FLAGS.samples_1, FLAGS.dim_1), SAGEInfo("node", sampler, FLAGS.samples_2, FLAGS.dim_2) ] model = SampleAndAggregate(placeholders, features, adj_info, minibatch.deg, layer_infos=layer_infos, aggregator_type="meanpool", model_size=FLAGS.model_size, identity_dim=FLAGS.identity_dim, logging=True) elif FLAGS.model == 'n2v': model = Node2VecModel( placeholders, features.shape[0], minibatch.deg, #2x because graphsage uses concat nodevec_dim=2 * FLAGS.dim_1, lr=FLAGS.learning_rate) else: raise Exception('Error: model name unrecognized.') config = tf.compat.v1.ConfigProto( log_device_placement=FLAGS.log_device_placement) config.gpu_options.allow_growth = True #config.gpu_options.per_process_gpu_memory_fraction = GPU_MEM_FRACTION config.allow_soft_placement = True # Initialize WandB experiment wandb.init(project='chengdu_GraphSAGE', save_code=True, tags=['unsupervised']) wandb.config.update(flags.FLAGS) # Initialize session sess = tf.compat.v1.Session(config=config) merged = tf.compat.v1.summary.merge_all() summary_writer = tf.compat.v1.summary.FileWriter(log_dir(), sess.graph) # Init variables sess.run(tf.compat.v1.global_variables_initializer(), feed_dict={adj_info_ph: minibatch.adj}) # Init saver saver = tf.compat.v1.train.Saver(max_to_keep=8, keep_checkpoint_every_n_hours=1) # Train model train_shadow_mrr = None val_shadow_mrr = None total_steps = 0 avg_time = 0.0 epoch_val_costs = [] train_adj_info = tf.compat.v1.assign(adj_info, minibatch.adj) val_adj_info = tf.compat.v1.assign(adj_info, minibatch.test_adj) for epoch in range(FLAGS.epochs): minibatch.shuffle() iter = 0 print('Epoch: %04d' % (epoch + 1)) epoch_val_costs.append(0) while not minibatch.end(): # Construct feed dictionary feed_dict = minibatch.next_minibatch_feed_dict() feed_dict.update({placeholders['dropout']: FLAGS.dropout}) t = time.time() # Training step outs = sess.run([ merged, model.opt_op, model.loss, model.ranks, model.aff_all, model.mrr, model.outputs1 ], feed_dict=feed_dict) train_cost = outs[2] train_mrr = outs[5] if train_shadow_mrr is None: train_shadow_mrr = train_mrr # else: train_shadow_mrr -= (1 - 0.99) * (train_shadow_mrr - train_mrr) # Validation if iter % FLAGS.validate_iter == 0: sess.run(val_adj_info.op) val_cost, ranks, val_mrr, duratioosm_evaluationn = evaluate( sess, model, minibatch, size=FLAGS.validate_batch_size) sess.run(train_adj_info.op) epoch_val_costs[-1] += val_cost if val_shadow_mrr is None: val_shadow_mrr = val_mrr else: val_shadow_mrr -= (1 - 0.99) * (val_shadow_mrr - val_mrr) if total_steps % FLAGS.print_every == 0: summary_writer.add_summary(outs[0], total_steps) # Print results avg_time = (avg_time * total_steps + time.time() - t) / (total_steps + 1) if total_steps % FLAGS.print_every == 0: print( "[%03d/%03d]" % (epoch + 1, FLAGS.epochs), "Iter:", '%04d' % iter, "train_loss =", "{:.5f}".format(train_cost), "train_mrr =", "{:.5f}".format(train_mrr), "train_mrr_ema =", "{:.5f}".format( train_shadow_mrr), # exponential moving average "val_loss =", "{:.5f}".format(val_cost), "val_mrr =", "{:.5f}".format(val_mrr), "val_mrr_ema =", "{:.5f}".format( val_shadow_mrr), # exponential moving average "time =", "{:.5f}".format(avg_time)) # W&B Logging if FLAGS.wandb_log and iter % FLAGS.wandb_log_iter == 0: wandb.log({'train_loss': train_cost, 'epoch': epoch}) wandb.log({'train_mrr': train_mrr, 'epoch': epoch}) wandb.log({'train_mrr_ema': train_shadow_mrr, 'epoch': epoch}) wandb.log({'val_loss': val_cost, 'epoch': epoch}) wandb.log({'val_mrr': val_mrr, 'epoch': epoch}) wandb.log({'val_mrr_ema': val_shadow_mrr, 'epoch': epoch}) wandb.log({'time': avg_time, 'epoch': epoch}) iter += 1 total_steps += 1 if total_steps > FLAGS.max_total_steps: print('Max total steps reached!') break # Save embeddings if FLAGS.save_embeddings and epoch % FLAGS.save_embeddings_epoch == 0: save_val_embeddings(sess, model, minibatch, FLAGS.validate_batch_size, log_dir()) # # Also report classifier metric on the embedding # all_tr_res, all_ts_res = osm_evaluation.evaluate(FLAGS.train_prefix, log_dir, n_iter=FLAGS.classif_n_iter) # if FLAGS.wandb_log: # wandb.log(all_tr_res) # wandb.log(all_ts_res) # Save Model checkpoints if FLAGS.save_checkpoints and epoch % FLAGS.save_checkpoints_epoch == 0: # saver.save(sess, log_dir() + 'model', global_step=1000) print('Save model checkpoint:', wandb.run.dir, iter, total_steps, epoch) saver.save( sess, os.path.join(wandb.run.dir, "model-" + str(epoch + 1) + ".ckpt")) if total_steps > FLAGS.max_total_steps: print('Max total steps reached!') break print("Optimization Finished!") if FLAGS.save_embeddings: sess.run(val_adj_info.op) save_val_embeddings(sess, model, minibatch, FLAGS.validate_batch_size, log_dir()) if FLAGS.model == "n2v": # stopping the gradient for the already trained nodes train_ids = tf.constant( [[id_map[n]] for n in G.nodes_iter() if not G.node[n]['val'] and not G.node[n]['test']], dtype=tf.int32) test_ids = tf.constant([[id_map[n]] for n in G.nodes_iter() if G.node[n]['val'] or G.node[n]['test']], dtype=tf.int32) update_nodes = tf.nn.embedding_lookup(model.context_embeds, tf.squeeze(test_ids)) no_update_nodes = tf.nn.embedding_lookup(model.context_embeds, tf.squeeze(train_ids)) update_nodes = tf.scatter_nd(test_ids, update_nodes, tf.shape(model.context_embeds)) no_update_nodes = tf.stop_gradient( tf.scatter_nd(train_ids, no_update_nodes, tf.shape(model.context_embeds))) model.context_embeds = update_nodes + no_update_nodes sess.run(model.context_embeds) # run random walks from graphsage.utils import run_random_walks nodes = [ n for n in G.nodes_iter() if G.node[n]["val"] or G.node[n]["test"] ] start_time = time.time() pairs = run_random_walks(G, nodes, num_walks=50) walk_time = time.time() - start_time test_minibatch = EdgeMinibatchIterator( G, id_map, placeholders, batch_size=FLAGS.batch_size, max_degree=FLAGS.max_degree, num_neg_samples=FLAGS.neg_sample_size, context_pairs=pairs, n2v_retrain=True, fixed_n2v=True) start_time = time.time() print("Doing test training for n2v.") test_steps = 0 for epoch in range(FLAGS.n2v_test_epochs): test_minibatch.shuffle() while not test_minibatch.end(): feed_dict = test_minibatch.next_minibatch_feed_dict() feed_dict.update({placeholders['dropout']: FLAGS.dropout}) outs = sess.run([ model.opt_op, model.loss, model.ranks, model.aff_all, model.mrr, model.outputs1 ], feed_dict=feed_dict) if test_steps % FLAGS.print_every == 0: print("Iter:", '%04d' % test_steps, "train_loss=", "{:.5f}".format(outs[1]), "train_mrr=", "{:.5f}".format(outs[-2])) test_steps += 1 train_time = time.time() - start_time save_val_embeddings(sess, model, minibatch, FLAGS.validate_batch_size, log_dir(), mod="-test") print("Total time: ", train_time + walk_time) print("Walk time: ", walk_time) print("Train time: ", train_time)
def _scatter(indices, array, new_dimensions): indices = np.expand_dims(indices, 1) return tf.scatter_nd(indices, array, new_dimensions)
def word_distribution(decoder_logit_list, decoder_output_list, encoder_outputs, encoder_embedding, sentence_complex_input_placeholder, obj_tensors, model_config, data, segment_mask=None, is_test=False): if model_config.architecture == 'ut2t': # attn_dists = obj_tensors[ # 'model/transformer_decoder/decoder/universal_transformer_act/encdec_attention/multihead_attention/dot_product_attention'] # attn_dists = attn_dists[:, 0, :, :] raise ValueError('Cannot use copy in u2t2') else: attn_dists = obj_tensors[ 'model/transformer_decoder/decoder/layer_%s/encdec_attention/multihead_attention/dot_product_attention' % (model_config.num_decoder_layers - 1)] attn_dists_stack = attn_dists[:, 0, :, :] if is_test: attn_dists = [attn_dists_stack[:, -1, :]] attn_dists_stack = tf.expand_dims(attn_dists[0], axis=1) else: attn_dists = tf.unstack(attn_dists_stack, axis=1) sentence_complex_input = tf.stack(sentence_complex_input_placeholder, axis=1) ignore_token_idx = data.vocab_simple.encode(constant.SYMBOL_UNK) if type(ignore_token_idx) == list: assert len(ignore_token_idx) == 1 ignore_token_idx = ignore_token_idx[0] if segment_mask is not None: sentence_complex_input *= segment_mask sentence_complex_input += tf.to_int32( tf.to_float(tf.equal(sentence_complex_input, 0)) * ignore_token_idx) batch_nums = tf.range(0, limit=model_config.batch_size) batch_nums = tf.expand_dims(batch_nums, 1) batch_nums = tf.tile(batch_nums, [1, model_config.max_complex_sentence]) indices = tf.stack((batch_nums, sentence_complex_input), axis=2) attn_dists_projected = [ tf.scatter_nd( indices, copy_dist, [model_config.batch_size, data.vocab_simple.vocab_size()]) for copy_dist in attn_dists ] for attn_id, attn_dist in enumerate(attn_dists_projected): mask = tf.concat([ tf.ones([model_config.batch_size, ignore_token_idx]), tf.zeros([model_config.batch_size, 1]), tf.ones([ model_config.batch_size, data.vocab_simple.vocab_size() - ignore_token_idx - 1 ]) ], axis=1) attn_dists_projected[attn_id] *= mask attn_dists_projected = tf.stack(attn_dists_projected, axis=1) attn_dists_projected = tf.stop_gradient(attn_dists_projected) decoder_logit = tf.stack(decoder_logit_list, axis=1) decoder_output = tf.stack(decoder_output_list, axis=1) context_vectors = tf.matmul(attn_dists_stack, encoder_outputs) context_emb_vectors = tf.matmul(attn_dists_stack, encoder_embedding) context_vectors = tf.stop_gradient(context_vectors) context_emb_vectors = tf.stop_gradient(context_emb_vectors) decoder_output = tf.stop_gradient(decoder_output) # decoder_logit = tf.stop_gradient(decoder_logit) evidence = tf.concat( [context_vectors, context_emb_vectors, decoder_output], axis=-1) gate = tf.layers.dense(evidence, 1, activation=tf.nn.sigmoid) if 'thres' in model_config.pointer_mode: output_logit = tf.cond(tf.greater_equal(gate, 0.5), lambda: attn_dists_projected, lambda: decoder_logit) elif 'fuse' in model_config.pointer_mode: output_logit = gate * attn_dists_projected + (1 - gate) * decoder_logit else: raise NotImplementedError('unknown output pointer') return tf.unstack(output_logit, axis=1)
def sphereloss(inputs, label, classes, batch_size, fraction=1, scope='Logits', reuse=None, m=4, eplion=1e-8): """ inputs tensor shape=[batch,features_num] labels tensor shape=[batch] each unit belong num_outputs """ inputs_shape = inputs.get_shape().as_list() with tf.variable_scope(name_or_scope=scope): weight = tf.Variable(initial_value=tf.random_normal( (classes, inputs_shape[1])) * tf.sqrt(2 / inputs_shape[1]), dtype=tf.float32, name='weights') # shaep =classes, features, print("weight shape = ", weight.get_shape().as_list()) weight_unit = tf.nn.l2_normalize(weight, dim=1) print("weight_unit shape = ", weight_unit.get_shape().as_list()) inputs_mo = tf.sqrt(tf.reduce_sum(tf.square(inputs), axis=1) + eplion) #shape=[batch print("inputs_mo shape = ", inputs_mo.get_shape().as_list()) inputs_unit = tf.nn.l2_normalize(inputs, dim=1) #shape = [batch,features_num] print("inputs_unit shape = ", inputs_unit.get_shape().as_list()) logits = tf.matmul( inputs, tf.transpose(weight_unit)) #shape = [batch,classes] x * w_unit print("logits shape = ", logits.get_shape().as_list()) weight_unit_batch = tf.gather(weight_unit, label) # shaep =batch,features_num, print("weight_unit_batch shape = ", weight_unit_batch.get_shape().as_list()) logits_inputs = tf.reduce_sum(tf.multiply(inputs, weight_unit_batch), axis=1) # shaep =batch, print("logits_inputs shape = ", logits_inputs.get_shape().as_list()) cos_theta = tf.reduce_sum(tf.multiply(inputs_unit, weight_unit_batch), axis=1) # shaep =batch, print("cos_theta shape = ", cos_theta.get_shape().as_list()) cos_theta_square = tf.square(cos_theta) cos_theta_biq = tf.pow(cos_theta, 4) sign0 = tf.sign(cos_theta) sign2 = tf.sign(2 * cos_theta_square - 1) sign3 = tf.multiply(sign2, sign0) sign4 = 2 * sign0 + sign3 - 3 cos_far_theta = sign3 * (8 * cos_theta_biq - 8 * cos_theta_square + 1) + sign4 print("cos_far_theta = ", cos_far_theta.get_shape().as_list()) logit_ii = tf.multiply(cos_far_theta, inputs_mo) #shape = batch print("logit_ii shape = ", logit_ii.get_shape().as_list()) index_range = tf.range(start=0, limit=tf.shape(inputs, out_type=tf.int64)[0], delta=1, dtype=tf.int64) index_labels = tf.stack([index_range, label], axis=1) index_logits = tf.scatter_nd(index_labels, tf.subtract(logit_ii, logits_inputs), tf.shape(logits, out_type=tf.int64)) print("index_logits shape = ", logit_ii.get_shape().as_list()) logits_final = tf.add(logits, index_logits) logits_final = fraction * logits_final + (1 - fraction) * logits loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(labels=label, logits=logits_final)) return logits_final, loss
tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(b_answer2)) # The regressed word. This isn't an actual word yet; # we still have to find the closest match. logit = tf.expand_dims( tf.matmul(tf.matmul(a0, w_answer1) + b_answer1, w_answer2) + b_answer2, 1) # Make a mask over which words exist. with tf.variable_scope("ending"): all_ends = tf.reshape(input_sentence_endings, [-1, 2]) range_ends = tf.range(tf.shape(all_ends)[0]) ends_indices = tf.stack([all_ends[:, 0], range_ends], axis=1) ind = tf.reduce_max(tf.scatter_nd( ends_indices, all_ends[:, 1], [tf.shape(q)[0], tf.shape(all_ends)[0]]), axis=-1) range_ind = tf.range(tf.shape(ind)[0]) mask_ends = tf.cast( tf.scatter_nd(tf.stack([ind, range_ind], axis=1), tf.ones_like(range_ind), [tf.reduce_max(ind) + 1, tf.shape(ind)[0]]), bool) # A bit of a trick. With the locations of the ends of the mask (the last periods in # each of the contexts) as 1 and the rest as 0, we can scan with exclusive or # (starting from all 1). For each context in the batch, this will result in 1s # up until the marker (the location of that last period) and 0s afterwards. mask = tf.scan(tf.logical_xor, mask_ends, tf.ones_like(range_ind, dtype=bool))
def step(self, time, inputs, state, name=None): """Perform a decoding step. Args: time: scalar `int32` tensor. inputs: A (structure of) input tensors. state: A (structure of) state tensors and TensorArrays. name: Name scope for any created operations. Returns: `(outputs, next_state, next_inputs, finished)`. """ with ops.name_scope(name, "PGDecoderStep", (time, inputs, state)): cell_outputs, cell_state = self._cell(inputs, state) # the first cell state contains attention, which is context attention = cell_state[0].attention att_cell_state = cell_state[0].cell_state alignments = cell_state[0].alignments with tf.variable_scope('calculate_pgen'): p_gen = _linear([attention, inputs, att_cell_state], 1, True) p_gen = tf.sigmoid(p_gen) if self._output_layer is not None: cell_outputs = self._output_layer(cell_outputs) vocab_dist = tf.nn.softmax(cell_outputs) * p_gen # z = tf.reduce_sum(alignments,axis=1) # z = tf.reduce_sum(tf.cast(tf.less_equal(alignments, 0),tf.int32)) alignments = alignments * (1 - p_gen) # x = tf.reduce_sum(tf.cast(tf.less_equal((1-p_gen), 0),tf.int32)) # y = tf.reduce_sum(tf.cast(tf.less_equal(alignments[3], 0),tf.int32)) # this is only for debug # alignments2 = tf.Print(alignments2,[tf.shape(inputs),x,y,alignments[2][9:12]],message="zeros in vocab dist and alignments") # since we have OOV words, we need expand the vocab dist vocab_size = tf.shape(vocab_dist)[-1] extended_vsize = vocab_size + self.source_oov_words batch_size = tf.shape(vocab_dist)[0] extra_zeros = tf.zeros((batch_size, self.source_oov_words)) # batch * extend vocab size vocab_dists_extended = tf.concat(axis=-1, values=[vocab_dist, extra_zeros]) # vocab_dists_extended = tf.Print(vocab_dists_extended,[tf.shape(vocab_dists_extended),self.source_oov_words],message='vocab_dists_extended size') batch_nums = tf.range(0, limit=batch_size) # shape (batch_size) batch_nums = tf.expand_dims(batch_nums, 1) # shape (batch_size, 1) attn_len = tf.shape(self.source_extend_tokens)[ 1] # number of states we attend over batch_nums = tf.tile(batch_nums, [1, attn_len]) # shape (batch_size, attn_len) indices = tf.stack((batch_nums, self.source_extend_tokens), axis=2) # shape (batch_size, enc_t, 2) shape = [batch_size, extended_vsize] attn_dists_projected = tf.scatter_nd(indices, alignments, shape) final_dists = attn_dists_projected + vocab_dists_extended # final_dists = tf.Print(final_dists,[tf.reduce_sum(tf.cast(tf.less_equal(final_dists[0],0),tf.int32))],message='final dist') # note: sample_ids will contains OOV words sample_ids = self._helper.sample(time=time, outputs=final_dists, state=cell_state) (finished, next_inputs, next_state) = self._helper.next_inputs(time=time, outputs=cell_outputs, state=cell_state, sample_ids=sample_ids) outputs = tf.contrib.seq2seq.BasicDecoderOutput( final_dists, sample_ids) return (outputs, next_state, next_inputs, finished)
def scatter_nd(*args, **kwargs): """ See https://www.tensorflow.org/api_docs/python/tf/scatter_nd . """ return tensorflow.scatter_nd(*args, **kwargs)
def __call__(self, y_t_1, s_t_1, encoder_outputs, encoder_feature, enc_padding_mask, extra_zeros, enc_batch_extend_vocab, coverage, step): if step == 0: c_t, _, coverage_next = self.attention(s_t_1, encoder_outputs, encoder_feature, enc_padding_mask, coverage) coverage = coverage_next y_t_1_embd = self.embedding(y_t_1) # batch_size x 1 x embedding_dim y_t_1_embd = self.dropout(y_t_1_embd) y_t_1_embd = tf.expand_dims(y_t_1_embd, 1) gru_out, s_t = self.gru( y_t_1_embd, s_t_1 ) # batch_size x 1 x embedding_dim batch_size x embedding_dim gru_out = self.dropout(gru_out) s_t_hat = s_t # batch_size x embedding_dim c_t, attn_dist, coverage_next = self.attention(s_t_hat, encoder_outputs, encoder_feature, enc_padding_mask, coverage) if step > 0: coverage = coverage_next p_gen = None if self.pointer_gen: y_t_1_embd = tf.reshape(y_t_1_embd, [-1, self.embedding_dim]) p_gen_input = tf.concat((c_t, s_t_hat, y_t_1_embd), 1) # B x (2*2*dec_units + emb_dim) p_gen = self.p_gen_linear(p_gen_input) p_gen = tf.sigmoid(p_gen) output = tf.concat((tf.reshape(gru_out, [-1, self.dec_units]), c_t), 1) # B x dec_units * 3 output = self.out1(output) # B x dec_units # output = F.relu(output) output = self.out2(output) # B x vocab_size vocab_dist = tf.nn.softmax(output, axis=1) if self.pointer_gen: vocab_dist_ = p_gen * vocab_dist attn_dist_ = (1 - p_gen) * attn_dist if extra_zeros is not None: vocab_dist_ = tf.concat([vocab_dist_, extra_zeros], 1) shape_ = vocab_dist_.shape[1] enc_batch_extend_vocab = tf.expand_dims(enc_batch_extend_vocab, 2) attn_vocab_dist_ = tf.convert_to_tensor([ tf.scatter_nd(indices, updates, [shape_]) for (indices, updates) in zip(enc_batch_extend_vocab, attn_dist_) ]) final_dist = vocab_dist_ + attn_vocab_dist_ else: final_dist = vocab_dist return final_dist, s_t, c_t, attn_dist, p_gen, coverage
def dense_from_coo(shape, conns, dtype=tf.float64): idxs, weights = conns if len(idxs) == 0: return tf.zeros(shape, dtype=dtype) rows, cols = np.array(idxs).transpose() return tf.scatter_nd(tf.stack([rows, cols], -1), tf.convert_to_tensor(weights, preferred_dtype=dtype), shape)
def multiple_dot_attention(query, key, value, query_length=None, query_mask=None, memory_length=None, memory_mask=None, name=None): """ Attention method for given queries, keys and values, which in each sample we have multiple queries. (a sequence of queries) Args: query: a Tensor of shape [batch_size, q_length, query_dim] key: a Tensor of shape [batch_size, seq_length, query_dim] value: a Tensor of shape [batch_size, seq_length, value_dim] query_length: (optional) an integer Tensor of shape [batch_size] which specify length of queries for each sample query_mask: (optional) a bool Tensor of shape [batch_size, query_length] for specifying the true elements of queries in the condition that query_length is not given memory_length: (optional) an integer Tensor of shape [batch_size] which specify length of memory (key and values) for each sample memory_mask: (optional) a bool Tensor of shape [batch_size, seq_length] for specifying the true elements of keys and values in the condition that memory_length is not given name: (optional) Returns: a Tensor of shape [batch_size, q_length, value_dim] which is the result of attention mechanism """ if name is None: name = "multiple_dot_attention" with tf.name_scope(name): if query_length is not None and query_mask is not None: raise AttributeError( "Only one of query_length and query_mask can be specified") if memory_length is not None and memory_mask is not None: raise AttributeError( "Only one of memory_length and memory_mask can be specified") query_shape = tf.shape(query) key_shape = tf.shape(key) value_shape = tf.shape(value) batch_size = query_shape[0] q_length = query_shape[1] seq_length = key_shape[1] query_dim = query_shape[2] value_dim = value_shape[2] if query_length is not None: query_mask = mask_length(query_length, q_length) if query_mask is None: query_mask = tf.fill([batch_size, q_length], True) if memory_length is not None: memory_mask = mask_length(memory_length, seq_length) if memory_mask is None: memory_mask = tf.fill([batch_size, seq_length], True) indices = tf.where(query_mask) query = tf.boolean_mask(query, query_mask) key = tf.gather(key, indices[:, 0]) value = tf.gather(value, indices[:, 0]) memory_mask = tf.gather(memory_mask, indices[:, 0]) attention = simple_dot_attention(query, key, value, memory_mask=memory_mask) return tf.scatter_nd(indices, attention, [batch_size, q_length, value_dim], name=name)
def compute_vertex_normal(vertices: tf.Tensor, indices: tf.Tensor): """ Compute vertex normal by weighted average of nearby face normals using Nelson Max's algorithm. See `Weights for Computing Vertex Normals from Facet Vectors <https://escholarship.org/content/qt7657d8h3/qt7657d8h3.pdf?t=ptt283>`_. Args ==== vertices: tf.Tensor 3D position of vertices float32 tensor with size num_vertices x 3 indices: tf.Tensor vertex indices of triangle faces. int32 tensor with size num_triangles x 3 Returns ======= tf.Tensor per-vertex normal, float32 Tensor with size num_vertices x 3 """ def dot(v1, v2): return tf.math.reduce_sum(v1 * v2, axis=1) def squared_length(v): return tf.math.reduce_sum(v * v, axis=1) def length(v): return tf.sqrt(squared_length(v)) def safe_asin(v): # Hack: asin(1)' is infinite, so we want to clamp the contribution return tf.asin(tf.clip_by_value(v, 0, 1 - 1e-6)) # Nelson Max, "Weights for Computing Vertex Normals from Facet Vectors", 1999 normals = tf.zeros(vertices.shape, dtype=tf.float32) # NOTE: Try tf.TensorArray() v = [ tf.gather(vertices, indices[:, 0]), tf.gather(vertices, indices[:, 1]), tf.gather(vertices, indices[:, 2]) ] for i in range(3): v0 = v[i] v1 = v[(i + 1) % 3] v2 = v[(i + 2) % 3] e1 = v1 - v0 e2 = v2 - v0 e1_len = length(e1) e2_len = length(e2) side_a = e1 / tf.reshape(e1_len, [-1, 1]) side_b = e2 / tf.reshape(e2_len, [-1, 1]) if i == 0: n = tf.linalg.cross(side_a, side_b) n = tf.where(\ tf.broadcast_to(tf.reshape(length(n) > 0, (-1, 1)), tf.shape(n)), n / tf.reshape(length(n), (-1, 1)), tf.zeros(tf.shape(n), dtype=n.dtype)) angle = tf.where( dot(side_a, side_b) < 0, math.pi - 2.0 * safe_asin(0.5 * length(side_a + side_b)), 2.0 * safe_asin(0.5 * length(side_b - side_a))) sin_angle = tf.sin(angle) e1e2 = e1_len * e2_len # contrib is 0 when e1e2 is 0 contrib = tf.reshape(\ tf.where(e1e2 > 0, sin_angle / e1e2, tf.zeros(tf.shape(e1e2), dtype=e1e2.dtype)), (-1, 1)) contrib = n * tf.broadcast_to( contrib, [tf.shape(contrib)[0], 3]) # In torch, `expand(-1, 3)` normals += tf.scatter_nd(tf.reshape(indices[:, i], [-1, 1]), contrib, shape=tf.shape(normals)) degenerate_normals = tf.constant((0.0, 0.0, 1.0)) degenerate_normals = tf.broadcast_to( tf.reshape(degenerate_normals, (1, 3)), tf.shape(normals)) normals = tf.where( tf.broadcast_to(tf.reshape(length(normals) > 0, (-1, 1)), tf.shape(normals)), normals / tf.reshape(length(normals), (-1, 1)), degenerate_normals) return normals
def knot_weights(positions, num_knots, degree, cyclical, sparse_mode=False, name=None): """Function that converts cardinal B-spline positions to knot weights. Note: In the following, A1 to An are optional batch dimensions. Args: positions: A tensor with shape `[A1, .. An]`. Positions must be between `[0, C - D)` for non-cyclical and `[0, C)` for cyclical splines, where `C` is the number of knots and `D` is the spline degree. num_knots: A strictly positive `int` describing the number of knots in the spline. degree: An `int` describing the degree of the spline, which must be smaller than `num_knots`. cyclical: A `bool` describing whether the spline is cyclical. sparse_mode: A `bool` describing whether to return a result only for the knots with nonzero weights. If set to True, the function returns the weights of only the `degree` + 1 knots that are non-zero, as well as the indices of the knots. name: A name for this op. Defaults to "bspline_knot_weights". Returns: A tensor with dense weights for each control point, with the shape `[A1, ... An, C]` if `sparse_mode` is False. Otherwise, returns a tensor of shape `[A1, ... An, D + 1]` that contains the non-zero weights, and a tensor with the indices of the knots, with the type tf.int32. Raises: ValueError: If degree is greater than 4 or num_knots - 1, or less than 0. InvalidArgumentError: If positions are not in the right range. """ with tf.compat.v1.name_scope(name, "bspline_knot_weights", [positions]): positions = tf.convert_to_tensor(value=positions) if degree > 4 or degree < 0: raise ValueError("Degree should be between 0 and 4.") if degree > num_knots - 1: raise ValueError("Degree cannot be >= number of knots.") if cyclical: positions = asserts.assert_all_in_range(positions, 0.0, float(num_knots)) else: positions = asserts.assert_all_in_range(positions, 0.0, float(num_knots - degree)) all_basis_functions = { # Maps valid degrees to functions. Degree.CONSTANT: _constant, Degree.LINEAR: _linear, Degree.QUADRATIC: _quadratic, Degree.CUBIC: _cubic, Degree.QUARTIC: _quartic } basis_functions = all_basis_functions[degree] if not cyclical and num_knots - degree == 1: # In this case all weights are non-zero and we can just return them. if not sparse_mode: return basis_functions(positions) else: shift = tf.zeros_like(positions, dtype=tf.int32) return basis_functions(positions), shift # shape_batch = positions.shape.as_list() shape_batch = tf.shape(input=positions) positions = tf.reshape(positions, shape=(-1,)) # Calculate the nonzero weights from the decimal parts of positions. shift = tf.floor(positions) sparse_weights = basis_functions(positions - shift) shift = tf.cast(shift, tf.int32) if sparse_mode: # Returns just the weights and the shift amounts, so that tf.gather_nd on # the knots can be used to sparsely activate knots if needed. shape_weights = tf.concat( (shape_batch, tf.constant((degree + 1,), dtype=tf.int32)), axis=0) sparse_weights = tf.reshape(sparse_weights, shape=shape_weights) shift = tf.reshape(shift, shape=shape_batch) return sparse_weights, shift num_positions = tf.size(input=positions) ind_row, ind_col = tf.meshgrid( tf.range(num_positions, dtype=tf.int32), tf.range(degree + 1, dtype=tf.int32), indexing="ij") tiled_shifts = tf.reshape( tf.tile(tf.expand_dims(shift, axis=-1), multiples=(1, degree + 1)), shape=(-1,)) ind_col = tf.reshape(ind_col, shape=(-1,)) + tiled_shifts if cyclical: ind_col = tf.math.mod(ind_col, num_knots) indices = tf.stack((tf.reshape(ind_row, shape=(-1,)), ind_col), axis=-1) shape_indices = tf.concat((tf.reshape( num_positions, shape=(1,)), tf.constant( (degree + 1, 2), dtype=tf.int32)), axis=0) indices = tf.reshape(indices, shape=shape_indices) shape_scatter = tf.concat((tf.reshape( num_positions, shape=(1,)), tf.constant((num_knots,), dtype=tf.int32)), axis=0) weights = tf.scatter_nd(indices, sparse_weights, shape_scatter) shape_weights = tf.concat( (shape_batch, tf.constant((num_knots,), dtype=tf.int32)), axis=0) return tf.reshape(weights, shape=shape_weights)
def _body(x_adv, epoch, pixel_mask): ybar = model(x_adv) y_target = tf.slice(ybar, [0, yi], [-1, 1]) dy_dx, = tf.gradients(ybar, x_adv) dt_dx, = tf.gradients(y_target, x_adv) do_dx = dy_dx - dt_dx ind = tf.where(pixel_mask) n = tf.shape(ind) n = n[0] ind2 = tf.range(n) batch_size = tf.constant(100) def _maxpair_batch_cond(i0, j0, v0, start): return tf.less(start, n) def _maxpair_batch_body(i0, j0, v0, start): count = tf.reduce_min([batch_size, n - start]) ind3 = tf.slice(ind2, [start], [count]) # Selection C(n, 2), e.g., if n=4, a=[0 0 1 0 1 2], b=[1 2 2 3 3 3], # the corresponding element in each array makes a pair, i.e., the # pair index are store separately. A special case is when there is # only one pixel left. a, b = tf.meshgrid(ind3, ind3) c = tf.cond(tf.greater(count, 1), lambda: tf.less(a, b), lambda: tf.less_equal(a, b)) c = tf.where(c) a, b = tf.gather_nd(a, c), tf.gather_nd(b, c) # ii, jj contains indices to pixels ii, jj = tf.gather(ind, a), tf.gather(ind, b) ti, oi = tf.gather_nd(dt_dx, ii), tf.gather_nd(do_dx, ii) tj, oj = tf.gather_nd(dt_dx, jj), tf.gather_nd(do_dx, jj) # the gradient of each pair is the sum of individuals t, o = ti + tj, oi + oj # increase target probability while decrease others c = tf.logical_and(t >= 0, o <= 0) not_empty = tf.reduce_any(c) # ensure that c is not empty c = tf.cond(not_empty, lambda: c, lambda: tf.ones_like(c, dtype=bool)) c = tf.where(c) t, o = tf.gather_nd(t, c), tf.gather_nd(o, c) ii, jj = tf.gather_nd(ii, c), tf.gather_nd(jj, c) # saliency score score = tf.cond(not_empty, lambda: tf.multiply(t, tf.abs(o)), lambda: t - o) # find the max pair in current batch p = tf.argmax(score, axis=0) v = tf.reduce_max(score, axis=0) i, j = tf.gather(ii, p), tf.gather(jj, p) i, j = tf.to_int32(i), tf.to_int32(j) i1, j1, v1 = tf.cond(tf.greater(v, v0), lambda: (i, j, v), lambda: (i0, j0, v0)) return i1, j1, v1, start + batch_size i = tf.to_int32(tf.gather(ind, 0)) j = tf.to_int32(tf.gather(ind, 1)) # Find max saliency pair in batch. Naive iteration through the pair # takes O(n^2). Vectorized implementation may speedup the running time # significantly, at the expense of O(n^2) space. So Instead we find the # max pair with batch max, during each batch we use vectorized # implementation. i, j, _, _ = tf.while_loop(_maxpair_batch_cond, _maxpair_batch_body, (i, j, -1., 0), back_prop=False) dx = tf.scatter_nd([i], [eps], tf.shape(x_adv)) +\ tf.scatter_nd([j], [eps], tf.shape(x_adv)) x_adv = tf.stop_gradient(x_adv + dx) x_adv = tf.clip_by_value(x_adv, clip_min, clip_max) epoch += 1 pixel_mask = tf.cond(tf.greater(eps, 0), lambda: tf.less(x_adv, clip_max), lambda: tf.greater(x_adv, clip_min)) return x_adv, epoch, pixel_mask
def radialSpectrumMC(points, fourierSetup): with tf.name_scope('radialSpectrumMC'): #------------------------------------------- def sampleSpectrum(input): freqSamples = input[0] points = input[1] pointCount = tf.cast(tf.shape(points)[0], tf.float32) dotProduct = tf.tensordot(freqSamples, points, [[2], [1]]) twoPi = 2.0 * math.pi real = tf.cos(twoPi * dotProduct) imag = tf.sin(twoPi * dotProduct) sumReal = tf.reduce_sum(real, -1) sumImag = tf.reduce_sum(imag, -1) power = (sumReal * sumReal + sumImag * sumImag) / pointCount return power #------------------------------------------- def ceilAwayFromZero(input): return tf.sign(input) * tf.ceil(tf.abs(input)) #------------------------------------------- batchSize, _, dimCount = points.shape freqRes = fourierSetup.resolution freqStep = fourierSetup.freqStep mcSampleCount = fourierSetup.mcSamplesPerShell # generate normal samples normDst = tf.distributions.Normal( loc=np.full((dimCount,), 0.), scale=np.full((dimCount,), 1.)) mcSamples = tf.cast(normDst.sample([batchSize, freqRes, mcSampleCount]), tf.float32) # project samples to unit hypersphere # https://dl.acm.org/citation.cfm?id=377946 shellSamples = tf.nn.l2_normalize(mcSamples, axis=-1, epsilon=SQRT_EPS) # scale shells by frequencies frequencies = tf.range( start = 0, limit = freqRes * freqStep, delta = freqStep, dtype = tf.float32) shellSamples *= tf.reshape(frequencies, [1, freqRes, 1, 1]) #shellSamples = tf.round(shellSamples) shellSamples = ceilAwayFromZero(shellSamples) # power spectrum for each frequency sample spectrum = tf.map_fn( lambda b: sampleSpectrum(b), (shellSamples, points), dtype=tf.float32) # radial and batch average avg = tf.reduce_mean(spectrum, [0, -1]) if fourierSetup.cancelDC: dcComp = avg[0] avg -= tf.scatter_nd([[0]], [dcComp], avg.shape) return avg
def vec_to_tri_vector(vector): return tf.scatter_nd(indices=indices, shape=[N, N], updates=vector)
def build_model(): with tf.name_scope('placeholders'): real_data_int = tf.placeholder( tf.uint8, [None, picture_size]) # uint8 with int values in [0, 255] x_true = tf.cast(real_data_int, tf.float32) / 255. # float with values in [0,1] z = tf.placeholder(tf.float32, [None, input_dim]) if use_JL: JL = tf.placeholder(tf.float32, [picture_size, JL_dim]) P_non_normalized = tf.placeholder(tf.float32, [JL_dim, n_projections]) P_non_normalized_SWD = tf.placeholder( tf.float32, [picture_size, n_projections]) else: JL = None P_non_normalized = tf.placeholder( tf.float32, [picture_size, n_projections]) P_non_normalized_SWD = None x_generated = generator( z, n_features_first=n_features_first, n_features_reduction_factor=n_features_reduction_factor, min_features=min_features, BN=BN, power=power, init_method=init_method) # define loss (big part taken from SWG) with tf.name_scope('loss'): # apply the Johnson-Lindenstrauss map, if wanted, to the flattened arrays if use_JL: JL_true = tf.matmul(x_true, JL) JL_gen = tf.matmul(x_generated, JL) else: JL_true = x_true JL_gen = x_generated # next project the samples (images). After being transposed, we have tensors # of the format: [[projected_image1_proj1, projected_image2_proj1, ...], # [projected_image1_proj2, projected_image2_proj2, ...], # ...] # Each row has the projections along one direction. This makes it easier for the sorting that follows. # first normalize the random normal vectors to lie in the sphere P = tf.nn.l2_normalize(P_non_normalized, axis=0) projected_true = tf.transpose(tf.matmul(JL_true, P)) projected_fake = tf.transpose(tf.matmul(JL_gen, P)) sorted_true, true_indices = tf.nn.top_k(input=projected_true, k=batch_size) sorted_fake, fake_indices = tf.nn.top_k(input=projected_fake, k=batch_size) # For faster gradient computation, we do not use sorted_fake to compute # loss. Instead we re-order the sorted_true so that the samples from the # true distribution go to the correct sample from the fake distribution. # It is less expensive (memory-wise) to rearrange arrays in TF. # Flatten the sorted_true from dim [n_projections, batch_size]. flat_true = tf.reshape(sorted_true, [-1]) # Modify the indices to reflect this transition to an array. # new index = row + index rows = np.asarray([ batch_size * np.floor(i * 1.0 / batch_size) for i in range(n_projections * batch_size) ]) rows = rows.astype(np.int32) flat_idx = tf.reshape(fake_indices, [-1, 1]) + np.reshape( rows, [-1, 1]) # The scatter operation takes care of reshaping to the rearranged matrix shape = tf.constant([batch_size * n_projections]) rearranged_true = tf.reshape( tf.scatter_nd(flat_idx, flat_true, shape), [n_projections, batch_size]) generator_loss = tf.reduce_mean( tf.square(projected_fake - rearranged_true)) # get for JLSWGN the sliced Wasserstein distance (SWD) (since SWD and JLSWD are not comparable) if use_JL: P_SWD = tf.nn.l2_normalize(P_non_normalized_SWD, axis=0) projected_true_SWD = tf.transpose(tf.matmul(x_true, P_SWD)) projected_fake_SWD = tf.transpose(tf.matmul( x_generated, P_SWD)) sorted_true_SWD, true_indices_SWD = tf.nn.top_k( input=projected_true_SWD, k=batch_size) sorted_fake_SWD, fake_indices_SWD = tf.nn.top_k( input=projected_fake_SWD, k=batch_size) flat_true_SWD = tf.reshape(sorted_true_SWD, [-1]) flat_idx_SWD = tf.reshape(fake_indices_SWD, [-1, 1]) + np.reshape(rows, [-1, 1]) rearranged_true_SWD = tf.reshape( tf.scatter_nd(flat_idx_SWD, flat_true_SWD, shape), [n_projections, batch_size]) SWD = tf.reduce_mean( tf.square(projected_fake_SWD - rearranged_true_SWD)) else: SWD = generator_loss with tf.name_scope('optimizer'): generator_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='generator') g_optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.5) g_train = g_optimizer.minimize(generator_loss, var_list=generator_vars) # initialize variables using init_method session.run(tf.global_variables_initializer()) return real_data_int, z, x_generated, JL, P_non_normalized, P_non_normalized_SWD, SWD, g_train
def seq2seq(train_or_infer, x_id, y_id, keep_prob, batch_size, x_id_extended, y_id_extended, vocab_size_extend, word_embd_dim, dim_rnn, use_same_word_embd=False, encoder_word_embd_pretrain=None, encoder_vocab_size=None, decoder_word_embd_pretrain=None, decoder_vocab_size=None, target_seq_len_max=None): with tf.variable_scope('encoder') as scope_encoder: # encoder[0] [batch_size, source_seq_max_len]:源序列,未embedding,类型np.array encoder = [x_id] # batch_size = encoder[0].shape[0].value encoder_seq_max_len = encoder[0].shape[1].value encoder_seq_len = tf.cast(tf.reduce_sum(tf.sign(encoder[0]), axis=1), tf.int32) # encoder[1] [batch_size, source_seq_max_len, word_embd_dim]:对源序列数据进行embedding encoder_word_embd, encoder_vocab_size, word_embd_dim \ = creat_word_embd(encoder_word_embd_pretrain, encoder_vocab_size, word_embd_dim, name='encoder_word_embd_matrix') encoder_w2v = tf.nn.embedding_lookup(encoder_word_embd, encoder[0]) encoder.append(encoder_w2v) # encoder[2] ([batch_size,source_seq_max_len,dim_rnn*2], state_shape):构建encoder模型,并使用dynamic_rnn方法 encoder_cell_fw_raw = tf.nn.rnn_cell.BasicLSTMCell(num_units=dim_rnn, state_is_tuple=True) encoder_cell_fw = tf.nn.rnn_cell.DropoutWrapper( cell=encoder_cell_fw_raw, output_keep_prob=keep_prob) encoder_cell_bw_raw = tf.nn.rnn_cell.BasicLSTMCell(num_units=dim_rnn, state_is_tuple=True) encoder_cell_bw = tf.nn.rnn_cell.DropoutWrapper( cell=encoder_cell_bw_raw, output_keep_prob=keep_prob) encoder_outputs, state = tf.nn.bidirectional_dynamic_rnn( cell_fw=encoder_cell_fw, cell_bw=encoder_cell_bw, inputs=encoder[1], sequence_length=encoder_seq_len, dtype=tf.float32) memory = tf.concat(encoder_outputs, axis=2) init_state = init_state_reconstruct(encoder_state=state, encoder_state_type='bilstm', decoder_state_type='lstm', fill_zero=False) encoder.append((memory, init_state)) with tf.variable_scope('decoder') as scope_decoder: # decoder[0] [batch_size,target_seq_len_max]:目标序列,未embedding,类型np.array if 'train' == train_or_infer: decoder = [y_id] # target_seq同时加上了'<SOS>'和'<EOS>',所以计算长度时要-1 decoder_seq_len = tf.cast( tf.reduce_sum(tf.sign(decoder[0]) - 1, axis=1), tf.int32) elif 'infer' == train_or_infer: # 预测模式下,配置decoder[0]为step长度为1的<sos>,其中sos的code为1 decoder = [tf.ones(shape=[batch_size, 1], dtype=tf.int32)] else: print('para train_or_infer have not been defined !!!') decoder = [tf.ones(shape=[batch_size, 1], dtype=tf.int32)] # decoder[1] [batch_size, target_seq_max_len, word_embd_dim]:对目标序列数据进行embedding if use_same_word_embd is True: decoder_word_embd = encoder_word_embd decoder_vocab_size = encoder_vocab_size else: decoder_word_embd, decoder_vocab_size, word_embd_dim \ = creat_word_embd(decoder_word_embd_pretrain, decoder_vocab_size, word_embd_dim, name='decoder_word_embd_matrix') decoder_w2v = tf.nn.embedding_lookup(decoder_word_embd, decoder[0]) decoder.append(decoder_w2v) # decoder[2] 构建decoder模型 # outputs: list of [batch_size, dim_rnn] by length target_seq_len_max # aligns: list of [batch_size, encoder_step_len] by length target_seq_len_max # p_gens: list of [batch_size] by length target_seq_len_max with tf.variable_scope('dynamic_decoder') as scope_dynamic_decoder: decoder_cell_raw = tf.nn.rnn_cell.BasicLSTMCell( num_units=dim_rnn, state_is_tuple=True) decoder_cell = tf.nn.rnn_cell.DropoutWrapper( cell=decoder_cell_raw, output_keep_prob=keep_prob) outputs, aligns, p_gens, cell_state = dynamic_decoder( cell=decoder_cell, memory=memory, memory_seq_len=encoder_seq_len, init_state=init_state, train_or_infer=train_or_infer, decoder_seq_len_max=target_seq_len_max, target_seq_embd=decoder[1], decoder_word_embd=decoder_word_embd) decoder.append((outputs, aligns, p_gens)) # decoder[3] 计算Generator_Network和Pointer_Network的输出 # vocab_dist_extendeds: list [batch_size, decoder_vocab_size+vocab_size_extend] by length target_seq_len_max # attention_dist_extendeds: list [batch_size, decoder_vocab_size+vocab_size_extend] by len target_seq_len_max # p_gens: list of [batch_size] by length target_seq_len_max with tf.variable_scope('Generator_Network') as scope_Generator_Network: weight = tf.get_variable( name='weight', trainable=True, initializer=tf.truncated_normal( [decoder_cell.output_size, decoder_vocab_size], stddev=math.sqrt( 6 / (decoder_cell.output_size + decoder_vocab_size)), dtype=tf.float32)) bias = tf.get_variable(name='bias', trainable=True, initializer=tf.truncated_normal( [decoder_vocab_size], stddev=0.1, dtype=tf.float32)) vocab_scores = [] for step_num, output in enumerate(outputs): if step_num > 0: tf.get_variable_scope().reuse_variables() vocab_scores.append(tf.nn.xw_plus_b( output, weight, bias)) # apply the linear layer # vocab_distributions: list [batch_size, decoder_vocab_size] length of decoder_step_len vocab_distributions = [ tf.nn.softmax(score) for score in vocab_scores ] # vocab_dist_extendeds: list [batch_size, decoder_vocab_size+vocab_size_extend] by length decoder_step_len vocab_dist_extendeds = [ tf.pad(vocab_dist, paddings=[[0, 0], [0, vocab_size_extend]]) for vocab_dist in vocab_distributions ] with tf.variable_scope('Pointer_Network') as scope_Pointer_Network: index_batch_num = tf.range(batch_size) index_batch_num = tf.expand_dims(index_batch_num, 1) index_batch_num = tf.tile(index_batch_num, [1, encoder_seq_max_len]) index = tf.stack((index_batch_num, x_id_extended), axis=2) # attention_dist_extendeds: list [batch_size, decoder_vocab_size+vocab_size_extend] by len decoder_step_len attention_dist_extendeds = [ tf.scatter_nd( index, align, [batch_size, encoder_vocab_size + vocab_size_extend]) for align in aligns ] if use_same_word_embd is not True: # todo 这里其实是删去了源序列表内单词的概率,只保留OOV单词的概率,后续需进行修正。 attention_dist_extendeds = [ tf.concat([ tf.zeros(shape=[batch_size, decoder_vocab_size], dtype=tf.float32), att_dist[:, encoder_vocab_size:] ], axis=1) for att_dist in attention_dist_extendeds ] decoder.append( (vocab_dist_extendeds, attention_dist_extendeds, aligns)) # decoder[4] 计算模型的最终输出 # final_distributions: list of [batch_size, decoder_vocab_size+vocab_size_extend] by len target_seq_len_max with tf.variable_scope('Switching_Network') as scope_Switching_Network: # todo do not use copynet if False: final_distributions = vocab_dist_extendeds else: final_distributions = [ vocab_dist * p_gen + attn_dist * (1 - p_gen) for (p_gen, vocab_dist, attn_dist) in zip( p_gens, vocab_dist_extendeds, attention_dist_extendeds) ] decoder.append(final_distributions) # todo 引入非线性 if 'train' == train_or_infer: return encoder + decoder else: return tf.concat([ tf.expand_dims(step_output, axis=1) for step_output in decoder[-1] ], axis=1)
def likelihood_ratio_filter(node_pairs, modified_adjacency, original_adjacency, d_min, threshold=0.004): """ Filter the input node pairs based on the likelihood ratio test proposed by Zügner et al. 2018, see https://dl.acm.org/citation.cfm?id=3220078. In essence, for each node pair return 1 if adding/removing the edge between the two nodes does not violate the unnoticeability constraint, and return 0 otherwise. Assumes unweighted and undirected graphs. Parameters ---------- node_pairs: tf.Tensor, shape (e, 2) dtype int The e node pairs to consider, where each node pair consists of the two indices of the nodes. modified_adjacency: tf.Tensor shape (N,N) dtype int The input (modified) adjacency matrix. Assumed to be unweighted and symmetric. original_adjacency: tf.Tensor shape (N,N) dtype int The input (original) adjacency matrix. Assumed to be unweighted and symmetric. d_min: int The minimum degree considered in the Powerlaw distribution. threshold: float, default 0.004 Cutoff value for the unnoticeability constraint. Smaller means stricter constraint. 0.004 corresponds to a p-value of 0.95 in the Chi-square distribution with one degree of freedom. Returns ------- allowed_mask: tf.Tensor, shape (e,), dtype bool For each node pair p return True if adding/removing the edge p does not violate the cutoff value, False otherwise. current_ratio: tf.Tensor, shape (), dtype float The current value of the log likelihood ratio. """ N = int(modified_adjacency.shape[0]) original_degree_sequence = tf.cast( tf.reduce_sum(original_adjacency, axis=1), tf.float32) current_degree_sequence = tf.cast( tf.reduce_sum(modified_adjacency, axis=1), tf.float32) # Concatenate the degree sequences concat_degree_sequence = tf.concat( (current_degree_sequence[None, :], original_degree_sequence[None, :]), axis=1) # Compute the log likelihood values of the original, modified, and combined degree sequences. ll_orig, alpha_orig, n_orig, sum_log_degrees_original = degree_sequence_log_likelihood( original_degree_sequence, d_min) ll_current, alpha_current, n_current, sum_log_degrees_current = degree_sequence_log_likelihood( current_degree_sequence, d_min) ll_comb, alpha_comb, n_comb, sum_log_degrees_combined = degree_sequence_log_likelihood( concat_degree_sequence, d_min) # Compute the log likelihood ratio current_ratio = -2 * ll_comb + 2 * (ll_orig + ll_current) # Compute new log likelihood values that would arise if we add/remove the edges corresponding to each node pair. new_lls, new_alphas, new_ns, new_sum_log_degrees = updated_log_likelihood_for_edge_changes( node_pairs, tf.cast(modified_adjacency, tf.float32), d_min) # Combination of the original degree distribution with the distributions corresponding to each node pair. n_combined = n_orig + new_ns new_sum_log_degrees_combined = sum_log_degrees_original + new_sum_log_degrees alpha_combined = compute_alpha(n_combined, new_sum_log_degrees_combined, d_min) new_ll_combined = compute_log_likelihood(n_combined, alpha_combined, new_sum_log_degrees_combined, d_min) new_ratios = -2 * new_ll_combined + 2 * (new_lls + ll_orig) # Allowed edges are only those for which the resulting likelihood ratio measure is < than the threshold allowed_edges = new_ratios < threshold filtered_edges = tf.boolean_mask(node_pairs, allowed_edges) # Get the flattened indices for the allowed edges [e,2] -> [e,], similar to np.ravel_multi_index flat_ixs = ravel_multiple_indices(tf.cast(filtered_edges, tf.int32), modified_adjacency.shape) # Also for the reverse direction (we assume unweighted graphs). flat_ixs_reverse = ravel_multiple_indices( tf.reverse(tf.cast(filtered_edges, tf.int32), [1]), modified_adjacency.shape) # Construct a [N * N] array with ones at the admissible node pair locations and 0 everywhere else. indices_1 = tf.scatter_nd(flat_ixs[:, None], tf.ones_like(flat_ixs, dtype=tf.float32), shape=[N * N]) indices_2 = tf.scatter_nd(flat_ixs_reverse[:, None], tf.ones_like(flat_ixs_reverse, dtype=tf.float32), shape=[N * N]) # Add both directions allowed_mask = tf.clip_by_value(indices_1 + indices_2, 0, 1) return allowed_mask, current_ratio
def __init__(self, params, prior_embeddings=None, initializer_nvdm=None, topic_coherence_embeddings=None): self.vocab_size = params.TM_vocab_length self.n_hidden = params.hidden_size_TM self.n_topic = n_topic self.n_sample = n_sample self.non_linearity = non_linearity self.learning_rate = params.learning_rate self.batch_size = params.batch_size self.x = tf.placeholder(tf.float32, [None, self.vocab_size], name='x') self.mask = tf.placeholder(tf.float32, [None], name='mask') # mask paddings if params.use_sent_topic_rep: self.x_sent = tf.placeholder(tf.float32, [None, None, self.vocab_size], name='x_sent') if params.use_topic_embedding: self.x_doc_mask = tf.placeholder(tf.float32, [None, self.vocab_size], name='x_doc_mask') #self.input_batch_size = tf.placeholder(tf.int32, (), name='input_batch_size') self.input_batch_size = tf.shape(self.x)[0] if params.use_sent_topic_rep: self.input_batch_size_sent = tf.shape(self.x_sent)[0] self.input_batch_len_sent = tf.shape(self.x_sent)[1] self.batch_size_sent = self.input_batch_size_sent * self.input_batch_len_sent # encoder with tf.variable_scope('TM_encoder', reuse=tf.AUTO_REUSE): self.enc_vec = utils.mlp(self.x, [self.n_hidden], self.non_linearity) #self.enc_vec = utils.mlp(self.x, [self.n_hidden, self.n_hidden], self.non_linearity) self.mean = utils.nvdm_linear( self.enc_vec, self.n_topic, scope='mean', #matrix_initializer=initializer_nvdm[1][0], matrix_initializer=None, #bias_initializer=initializer_nvdm[1][1]) bias_initializer=None) self.logsigm = utils.nvdm_linear( self.enc_vec, self.n_topic, bias_start_zero=True, matrix_start_zero=True, scope='logsigm', #matrix_initializer=initializer_nvdm[2][0], matrix_initializer=None, #bias_initializer=initializer_nvdm[2][1]) bias_initializer=None) self.kld = -0.5 * tf.reduce_sum( 1 - tf.square(self.mean) + 2 * self.logsigm - tf.exp(2 * self.logsigm), 1) #self.kld = self.mask*self.kld # mask paddings self.kld = tf.multiply(self.mask, self.kld, name='kld') # mask paddings if params.use_sent_topic_rep: self.x_sent_reshape = tf.reshape(self.x_sent, [-1, self.vocab_size]) self.enc_vec_sent = utils.mlp(self.x_sent_reshape, [self.n_hidden], self.non_linearity) self.mean_sent = utils.nvdm_linear(self.enc_vec_sent, self.n_topic, scope='mean') self.logsigm_sent = utils.nvdm_linear(self.enc_vec_sent, self.n_topic, bias_start_zero=True, matrix_start_zero=True, scope='logsigm') if params.prior_emb_for_topics or params.topic_coherence_reg: W_prior = tf.get_variable('embeddings_TM_prior', dtype=tf.float32, initializer=prior_embeddings, trainable=False) with tf.variable_scope('TM_decoder', reuse=tf.AUTO_REUSE): if self.n_sample == 1: eps = tf.random_normal((self.input_batch_size, self.n_topic), mean=0.0, stddev=1.0, seed=seed) #doc_vec = tf.mul(tf.exp(self.logsigm), eps) + self.mean self.doc_vec = tf.add(tf.multiply(tf.exp(self.logsigm), eps), self.mean, name='doc_hidden') if GSM: self.doc_vec = tf.nn.softmax(self.doc_vec, axis=1) self.last_h = self.doc_vec logits_projected, self.decoding_matrix = utils.nvdm_linear( self.doc_vec, self.vocab_size, scope='projection', get_matrix=True, #matrix_initializer=initializer_nvdm[3][0], matrix_initializer=None, #bias_initializer=initializer_nvdm[3][1]) bias_initializer=None) logits = tf.nn.log_softmax(logits_projected) self.recons_loss = -tf.reduce_sum(tf.multiply(logits, self.x), 1) else: #eps = tf.random_normal((self.n_sample*self.batch_size, self.n_topic), mean=0.0, stddev=1.0) eps = tf.random_normal( (self.n_sample * self.input_batch_size, self.n_topic), mean=0.0, stddev=1.0, seed=seed) eps_list = tf.split(eps, self.n_sample, 0) recons_loss_list = [] doc_vec_list = [] for i in range(self.n_sample): if i > 0: tf.get_variable_scope().reuse_variables() curr_eps = eps_list[i] doc_vec = tf.add( tf.multiply(tf.exp(self.logsigm), curr_eps), self.mean) if GSM: doc_vec = tf.nn.softmax(doc_vec, axis=1) doc_vec_list.append(doc_vec) logits, self.decoding_matrix = utils.nvdm_linear( doc_vec, self.vocab_size, scope='projection', get_matrix=True, matrix_initializer=None, bias_initializer=None) logits = tf.nn.log_softmax(logits) recons_loss_list.append( -tf.reduce_sum(tf.multiply(logits, self.x), 1)) self.recons_loss = tf.add_n(recons_loss_list) / self.n_sample self.doc_vec = tf.add_n(doc_vec_list) / self.n_sample self.last_h = self.doc_vec # TOPIC EMBEDDING CODE if params.use_topic_embedding: topics_masked = tf.multiply(tf.expand_dims(self.x_doc_mask, axis=1), tf.expand_dims( self.decoding_matrix, axis=0), name='topics_masked') self.top_k = tf.nn.top_k(topics_masked, k=params.use_k_topic_words, sorted=False) if params.prior_emb_for_topics: self.top_k_embeddings = tf.nn.embedding_lookup( W_prior, self.top_k.indices) if concat_topic_emb_and_prop: self.topic_emb_size = prior_embeddings.shape[ 1] + self.n_topic else: self.topic_emb_size = prior_embeddings.shape[1] else: self.top_k_embeddings = tf.nn.embedding_lookup( tf.transpose(self.decoding_matrix), self.top_k.indices) if concat_topic_emb_and_prop: self.topic_emb_size = self.n_topic * 2 else: self.topic_emb_size = self.n_topic self.topic_embeddings = tf.reduce_mean(self.top_k_embeddings, axis=2, name='topic_embeddings') if params.use_k_topics > 0: # Masking document topic proportion vector top_k_h_values, top_k_h_indices = tf.nn.top_k( self.last_h, k=params.use_k_topics, sorted=False, name='top_k_h') row_numbers = tf.tile(tf.expand_dims( tf.range(0, self.input_batch_size), 1), [1, params.use_k_topics], name='row_numbers') full_indices = tf.concat([ tf.expand_dims(row_numbers, -1), tf.expand_dims(top_k_h_indices, -1) ], axis=2) full_indices = tf.reshape(full_indices, [-1, 2], name='full_indices') last_h_softmax = tf.scatter_nd( full_indices, tf.reshape(tf.nn.softmax(top_k_h_values, axis=1), [-1]), #tf.ones([self.input_batch_size * params.use_k_topics], dtype=tf.float32), [self.input_batch_size, self.n_topic], name='last_h_softmax') else: last_h_softmax = tf.nn.softmax(self.last_h, axis=1, name='last_h_softmax') self.last_h_topic_emb = tf.squeeze(tf.matmul( tf.expand_dims(last_h_softmax, axis=1), self.topic_embeddings), axis=1, name='last_h_topic_emb') if concat_topic_emb_and_prop: self.last_h_topic_emb = tf.concat( [self.last_h_topic_emb, self.last_h], axis=1, name='last_h_topic_emb_concat') # Code segment for Sentence-level topical discourse if params.use_sent_topic_rep: if self.n_sample == 1: eps_sent = tf.random_normal( (self.batch_size_sent, self.n_topic), mean=0.0, stddev=1.0, seed=seed) self.last_h_sent = tf.add(tf.multiply( tf.exp(self.logsigm_sent), eps_sent), self.mean_sent, name='sent_hidden') else: eps_sent = tf.random_normal( (self.n_sample * self.batch_size_sent, self.n_topic), mean=0.0, stddev=1.0, seed=seed) eps_sent_list = tf.split(eps_sent, self.n_sample, 0) recons_loss_list = [] sent_vec_list = [] for i in range(self.n_sample): if i > 0: tf.get_variable_scope().reuse_variables() curr_eps = eps_sent_list[i] sent_vec = tf.add( tf.multiply(tf.exp(self.logsigm_sent), curr_eps), self.mean_sent) if GSM: sent_vec = tf.nn.softmax(sent_vec, axis=1) sent_vec_list.append(sent_vec) self.last_h_sent = tf.add_n(sent_vec_list) / self.n_sample self.last_h_sent = tf.reshape(self.last_h_sent, [ self.input_batch_size_sent, self.input_batch_len_sent, self.n_topic ]) if params.use_topic_embedding: if params.use_k_topics > 0: # Masking sentence topic proportion vector top_k_h_sent_values, top_k_h_sent_indices = tf.nn.top_k( self.last_h_sent, k=params.use_k_topics, sorted=False, name='top_k_h_sent') row_numbers_sent = tf.tile(tf.expand_dims( tf.range(0, self.batch_size_sent), 1), [1, params.use_k_topics], name='row_numbers_sent') full_indices_sent = tf.concat([ tf.expand_dims(row_numbers_sent, -1), tf.expand_dims(top_k_h_sent_indices, -1) ], axis=2) full_indices_sent = tf.reshape( full_indices_sent, [-1, 2], name='full_indices_sent') last_h_softmax_sent = tf.scatter_nd( full_indices_sent, tf.reshape( tf.nn.softmax(top_k_h_sent_values, axis=1), [-1]), [self.batch_size_sent, self.n_topic], name='last_h_softmax_sent') else: last_h_softmax_sent = tf.nn.softmax( self.last_h_sent, axis=2, name='last_h_softmax_sent') self.last_h_topic_emb_sent = tf.matmul( last_h_softmax_sent, self.topic_embeddings, name='last_h_topic_emb_sent') if concat_topic_emb_and_prop: self.last_h_topic_emb_sent = tf.concat( [self.last_h_topic_emb_sent, self.last_h_sent], axis=2, name='last_h_topic_emb_sent_concat') #self.objective_TM = self.recons_loss + self.kld #self.objective_TM = tf.add(self.recons_loss, self.kld, name='TM_loss_unnormed') self.final_loss = tf.add(self.recons_loss, self.kld, name='TM_loss_unnormed') self.objective_TM = tf.reduce_mean(self.final_loss) if params.TM_uniqueness_loss: ## TCNLM topic uniqueness loss normed_topic_matrix = self.decoding_matrix / tf.reduce_sum( self.decoding_matrix, axis=1, keepdims=True) l2_normalized_topic_matrix = tf.nn.l2_normalize( normed_topic_matrix, axis=1) cosine_similarity = tf.matmul(l2_normalized_topic_matrix, l2_normalized_topic_matrix, transpose_a=False, transpose_b=True) cosine_distance = tf.subtract(1.0, cosine_similarity) mean_cosine_distance = tf.reduce_mean(cosine_distance) variance = tf.reduce_mean( tf.square(tf.subtract(cosine_distance, mean_cosine_distance))) #uniqueness_loss = mean_cosine_distance - variance uniqueness_loss = -mean_cosine_distance + variance self.objective_TM += params.alpha_uniqueness * uniqueness_loss #self.objective_TM += 0.01 * uniqueness_loss if params.topic_coherence_reg: #E_normalized = W_prior / tf.reduce_sum(W_prior, axis=1, keepdims=True) E_normalized = tf.nn.l2_normalize(W_prior, axis=1, name='E_normalized') #W_normalized = self.decoding_matrix / tf.reduce_sum(self.decoding_matrix, axis=1, keepdims=True) W_normalized = tf.nn.l2_normalize(self.decoding_matrix, axis=1, name='W_normalized') topic_vectors = tf.transpose(tf.matmul(W_normalized, E_normalized), [1, 0], name='topic_vectors') #topic_vectors_normalized = topic_vectors / tf.reduce_sum(topic_vectors, axis=1, name='topic_vectors_normalized') topic_vectors_normalized = tf.nn.l2_normalize( topic_vectors, axis=0, name='topic_vectors_normalized') cos_sim_matrix = tf.transpose(tf.matmul(E_normalized, topic_vectors_normalized), [1, 0], name='cos_sim_matrix') coherence_loss = -tf.reduce_sum(tf.multiply( cos_sim_matrix, W_normalized), name="coherence_loss") self.objective_TM += params.beta_coherence * coherence_loss optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate) #fullvars = tf.trainable_variables() #enc_vars = utils.variable_parser(fullvars, 'TM_encoder') enc_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='TM_encoder') #dec_vars = utils.variable_parser(fullvars, 'TM_decoder') dec_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='TM_decoder') enc_grads = tf.gradients(self.objective_TM, enc_vars) dec_grads = tf.gradients(self.objective_TM, dec_vars) self.optim_enc = optimizer.apply_gradients(zip(enc_grads, enc_vars)) self.optim_dec = optimizer.apply_gradients(zip(dec_grads, dec_vars))
def mask_adaptive_logsoftmax(hidden, target, n_token, d_embed, d_proj, cutoffs, params, tie_projs, initializer=None, proj_initializer=None, div_val=1, scope='adaptive_softmax', proj_same_dim=True, return_mean=True, **kwargs): def _logit(x, W, b, proj): y = x if proj is not None: y = tf.einsum('ibd,ed->ibe', y, proj) return tf.einsum('ibd,nd->ibn', y, W) + b params_W, params_projs = params[0], params[1] def _gather_logprob(logprob, target): lp_size = tf.shape(logprob) r = tf.range(lp_size[0]) idx = tf.stack([r, target], 1) return tf.gather_nd(logprob, idx) with tf.variable_scope(scope): if len(cutoffs) == 0: softmax_b = tf.get_variable('bias', [n_token], initializer=tf.zeros_initializer()) output = _logit(hidden, params_W, softmax_b, params_projs) nll = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=target, logits=output) else: cutoff_ends = [0] + cutoffs + [n_token] nll = tf.zeros_like(target, dtype=tf.float32) for i in range(len(cutoff_ends) - 1): with tf.variable_scope('cutoff_{}'.format(i)): l_idx, r_idx = cutoff_ends[i], cutoff_ends[i + 1] mask = (target >= l_idx) & (target < r_idx) mask_idx = tf.where(mask) cur_target = tf.boolean_mask(target, mask) - l_idx cur_d_embed = d_embed // (div_val**i) if div_val == 1: cur_W = params_W[l_idx:r_idx] else: cur_W = params_W[i] cur_b = tf.get_variable('b', [r_idx - l_idx], initializer=tf.zeros_initializer()) if tie_projs[i]: if div_val == 1: cur_proj = params_projs else: cur_proj = params_projs[i] else: if (div_val == 1 or not proj_same_dim) and d_proj == cur_d_embed: cur_proj = None else: cur_proj = tf.get_variable( 'proj', [cur_d_embed, d_proj], initializer=proj_initializer) if i == 0: cluster_W = tf.get_variable( 'cluster_W', [len(cutoffs), d_embed], initializer=tf.zeros_initializer()) cluster_b = tf.get_variable( 'cluster_b', [len(cutoffs)], initializer=tf.zeros_initializer()) cur_W = tf.concat([cur_W, cluster_W], 0) cur_b = tf.concat([cur_b, cluster_b], 0) head_logit = _logit(hidden, cur_W, cur_b, cur_proj) head_logprob = tf.nn.log_softmax(head_logit) cur_head_logprob = tf.boolean_mask(head_logprob, mask) cur_logprob = _gather_logprob(cur_head_logprob, cur_target) else: cur_head_logprob = tf.boolean_mask(head_logprob, mask) cur_hidden = tf.boolean_mask(hidden, mask) tail_logit = tf.squeeze( _logit(cur_hidden[None], cur_W, cur_b, cur_proj), 0) tail_logprob = tf.nn.log_softmax(tail_logit) cur_logprob = ( cur_head_logprob[:, cutoff_ends[1] + i - 1] + _gather_logprob(tail_logprob, cur_target)) nll += tf.scatter_nd(mask_idx, -cur_logprob, tf.to_int64(tf.shape(nll))) if return_mean: nll = tf.reduce_mean(nll) return nll
def call(self, x, padding=None): """Return outputs of the feedforward network. Args: x: tensor with shape [batch_size, length, hidden_size] padding: (optional) If set, the padding values are temporarily removed from x (provided self.allow_pad is set). The padding values are placed back in the output tensor in the same locations. shape [batch_size, length] Returns: Output of the feedforward network. tensor with shape [batch_size, length, hidden_size] """ padding = None if not self.allow_pad else padding # Retrieve dynamically known shapes batch_size = tf.shape(x)[0] length = tf.shape(x)[1] if padding is not None: with tf.name_scope("remove_padding"): # Flatten padding to [batch_size*length] pad_mask = tf.reshape(padding, [-1]) nonpad_ids = tf.to_int32(tf.where(pad_mask < 1e-9)) # Reshape x to [batch_size*length, hidden_size] to remove padding x = tf.reshape(x, [-1, self.hidden_size]) x = tf.gather_nd(x, indices=nonpad_ids) # Reshape x from 2 dimensions to 3 dimensions. x.set_shape([None, self.hidden_size]) x = tf.expand_dims(x, axis=0) # debug #print("in ffn_layer.py x.shape") #print(x.shape) output = self.filter_dense_layer(x) #print("in ffn_layer.py output.shape 1") #print(output.shape) if self.train: output = tf.nn.dropout(output, 1.0 - self.relu_dropout) output = self.output_dense_layer(output) #print("in ffn_layer.py output.shape 2") #print(output.shape) if padding is not None: with tf.name_scope("re_add_padding"): output = tf.squeeze(output, axis=0) output = tf.scatter_nd( indices=nonpad_ids, updates=output, shape=[batch_size * length, self.output_size] ) output = tf.reshape(output, [batch_size, length, self.output_size]) return output
def projection(XYZ, K, Extrinsic, H=224, W=224, reuse=False): #[B,N,3],[B,3,3],[B,4,4] """ Apply projection to the point cloud Params: -- XYZ : Point cloud. Tensor [batch_size, point_number, 3 -- K : Internal parameters. Tensor [batch_size, 3, 3] -- Extrinsic : External parameters. Tensor [batch_size, 4, 4] -- H : Downsampled height -- W : Downsampled weight Returns: -- newDepth : Depth image. Tensor [batch_size, H, W] -- Cloud_mask: Front(visible) points mask. Tensor[batch_size, point_number] """ XYZ = tf.transpose(XYZ, [0,2,1]) batchSize = tf.shape(XYZ)[0] downscale_H = 224.0/H downscale_W = 224.0/W K = K*np.array([[1.0/downscale_H], [1.0/downscale_W], [1]], dtype=np.float32) N = tf.shape(XYZ)[2] H = tf.constant(H) W = tf.constant(W) bg = pow(2,16) - 1 with tf.variable_scope("transform_render2D") as scope: if reuse: scope.reuse_variables() # ------ use camera calibration to compute new XYZ ------ ones = tf.ones([batchSize, 1, N]) XYZ = tf.concat([XYZ,ones], axis=1)# [B,4,N] XYZtemp = tf.matmul(Extrinsic, XYZ)# [B,4,N] = [B,4,4]*[B,4,N] XYZtemp = XYZtemp[:,:3,:] XYZnew = tf.matmul(K, XYZtemp)# [B,3,N] = [B,3,3]*[B,3,N] XYZnew = tf.transpose(XYZnew, [0,2,1]) # [B,N,3] eps = 1e-12 X = tf.reshape(tf.to_int32(tf.round(tf.div(XYZnew[:,:,0], XYZnew[:,:,2] + eps))), [-1]) #[B*N,] Y = tf.reshape(tf.to_int32(tf.round(tf.div(XYZnew[:,:,1], XYZnew[:,:,2] + eps))), [-1]) #[B*N,] YX = tf.stack([Y,X], axis=1) #[B*N,2] Batch = tf.range(0, batchSize, 1) Batch = tf.tile(tf.expand_dims(Batch, axis=1),[1,N]) Batch = tf.reshape(Batch, [batchSize*N, 1]) scatterIndex = tf.concat([Batch, YX], axis=1) #[B*N,3] scatterZ = tf.reshape(XYZnew[:,:,2],[-1]) #[B*N,] # ------ delete invalid points ------ _, Y_Index, X_Index = tf.split(scatterIndex, 3, axis=1) #[B*N,1] X_Index = tf.squeeze(X_Index) Y_Index = tf.squeeze(Y_Index) Cloud_mask_pre = tf.range(0,batchSize*N,1) mask_inside = (X_Index >= 0)&(X_Index < W)&(Y_Index >= 0)&(Y_Index < H)&(scatterZ >=0)&(scatterZ <=10) mask_inside.set_shape([None]) Cloud_mask_pre = tf.boolean_mask(Cloud_mask_pre,mask_inside) scatterIndex = tf.boolean_mask(scatterIndex, mask_inside) scatterZ = depthToint16(tf.boolean_mask(scatterZ, mask_inside)) #[B*N,] # ------ select front (visible) points ------ seg_id = scatterIndex[:,0]*H*W + scatterIndex[:,1]*W + scatterIndex[:,2] seg_min = tf.unsorted_segment_max(-scatterZ, seg_id, batchSize*H*W) #[B*H*W,] seg_mask = tf.gather_nd(-seg_min, tf.expand_dims(seg_id, axis=1)) #[B*N,] mask = ((scatterZ - seg_mask) <= 0) Cloud_mask_pre = tf.boolean_mask(Cloud_mask_pre, mask) scatterIndex = tf.boolean_mask(scatterIndex, mask) # ------ compute depth images ------ scatterZ = tf.boolean_mask(scatterZ, mask) scatterZ = scatterZ - bg newDepth = tf.scatter_nd(scatterIndex, scatterZ, shape=[batchSize, H, W]) #[B,H,W] newDepth = newDepth + bg # ------ compute front mask given extrinsic ------ Cloud_mask = tf.scatter_nd(tf.expand_dims(Cloud_mask_pre, axis=1), tf.ones_like(Cloud_mask_pre), shape=[batchSize*N]) Cloud_mask = (Cloud_mask > 0) Cloud_mask = tf.reshape(Cloud_mask, [batchSize,N]) return newDepth, Cloud_mask
def call(self, seq1, seq2): """Creates targets for pairwise sequence alignment task from proj. MSA rows. Given a pair of projected rows from an MSA (i.e., with positions at which both rows have a gap removed), the ground-truth alignment targets are obtained by: 1) Each position in the projected MSA is classified as _MATCH, _GAP_IN_X or _GAP_IN_Y. 2) The positions of match states are retrieved, as well as the starting position of each sequence in the ground-truth (local) alignment. 3) Positions before the first match state or after the last match state are discarded, as these do not belong to the local ground-truth alignment. 4) For each pair of consecutive match states, where consecutive here is to be understood when ignoring non-match states, it is checked whether there are BOTH _GAP_IN_X and _GAP_IN_Y states in between. 5) For each pair of consecutive match states with both _GAP_IN_X and _GAP_IN_Y states in between, these states are canonically sorted to ensure all _GAP_IN_X states occur first, being followed by all _GAP_IN_Y states. 6) We encode transitions, that is, ordered tuples (s_old, s_new) of states using the 9 hidden state model described in `look_up` (c.f. `init`), with initial transition (_START, _MATCH) encoded as in `self._init_trans`. 7) Given the new sequence of states, we reconstructed the positions in each sequence where those states would occur. 8) Finally, optionally, if any special tokens are to be prepended to the sequences after this transformation, the ground-truth alignment targets will be adjusted accordingly. Note, however, that tokens being appended require no further modification. Args: seq1: A tf.Tensor<int>[len], representing the first proj. row of the MSA. seq2: A tf.Tensor<int>[len], representing the second proj. row of the MSA. Returns: A tf.Tensor<int>[3, tar_len] with three stacked tf.Tensor<int>[tar_len], pos1, pos2 and enc_trans, such that (pos1[i], pos2[i], enc_trans[i]) represents the i-th transition in the ground-truth alignment. For example, (pos1[0], pos2[0], enc_trans[0]) = (1, 1, 3) would represent that the first transition in the ground-truth alignment is from the start state _START to the _MATCH(1,1) state whereas (pos1[2], pos2[2], enc_trans[2]) = (2, 5, 4) would represent that the third transition in the ground-truth alignment is from the match state _MATCH(2, 4) to the gap in X state _GAP_IN_X(2, 5). Both pos1 and pos2 use one-based indexing, reserving the use of the value zero for padding. In rare cases where the sequence pair has no aligned characters, tar_len will be zero. """ keep_indices1 = tf.cast( self._vocab.compute_mask(seq1, self._gap_token), tf.int32) keep_indices2 = tf.cast( self._vocab.compute_mask(seq2, self._gap_token), tf.int32) states = keep_indices1 - keep_indices2 m_states = tf.cast(tf.reshape(tf.where(states == self._MATCH), [-1]), tf.int32) n_matches = len(m_states) if n_matches == 0: return tf.zeros([3, 0], tf.int32) start, end = m_states[0], m_states[-1] offset1 = tf.reduce_sum(keep_indices1[:start]) offset2 = start - offset1 offset1 += self._n_prepend_tokens offset2 += self._n_prepend_tokens states = states[start:end + 1] keep_indices1 = keep_indices1[start:end + 1] keep_indices2 = keep_indices2[start:end + 1] m_states -= start segment_ids = tf.cumsum( tf.scatter_nd(m_states[1:, tf.newaxis], tf.ones(n_matches - 1, dtype=tf.int32), shape=[len(states)])) aux1 = tf.math.segment_sum(1 - keep_indices1, segment_ids)[:-1] aux2 = tf.math.segment_max(1 - keep_indices2, segment_ids)[:-1] gap_gap_trans_m_states_indices = tf.reshape(tf.where(aux1 * aux2), [-1]) if len(gap_gap_trans_m_states_indices) > 0: # pylint: disable=g-explicit-length-test for idx in gap_gap_trans_m_states_indices: s_i, e_i = m_states[idx] + 1, m_states[idx + 1] m_i = s_i + aux1[idx] v_x = tf.fill([aux1[idx]], self._GAP_IN_X) v_y = tf.fill([e_i - m_i], self._GAP_IN_Y) states = tf.raw_ops.TensorStridedSliceUpdate(input=states, begin=[s_i], end=[m_i], strides=[1], value=v_x) states = tf.raw_ops.TensorStridedSliceUpdate(input=states, begin=[m_i], end=[e_i], strides=[1], value=v_y) # Builds transitions. enc_trans = tf.gather(self._trans_encoder, self._hash_fn(states[:-1], states[1:])) enc_trans = tf.concat([self._init_trans, enc_trans], 0) # Positions such that (pos1[i], pos2[i]) for i = 0, ..., align_len - 1 # describes the alignment "path". pos1 = offset1 + tf.cumsum(tf.cast(states >= self._MATCH, tf.int32)) pos2 = offset2 + tf.cumsum(tf.cast(states <= self._MATCH, tf.int32)) return tf.stack([pos1, pos2, enc_trans])
def beam_decode(self, init_h=None, encoder_outputs=None, input_valid_length=None, decode=False): """ Args: encoder_outputs (Variable, FloatTensor): [batch_size, source_length, hidden_size] input_valid_length (Variable, LongTensor): [batch_size] (optional) init_h (variable, FloatTensor): [batch_size, hidden_size] (optional) Return: out : [batch_size, seq_len] """ batch_size = self.batch_size(h=init_h) # [batch_size x beam_size] x = self.init_token(batch_size * self.beam_size, SOS_ID) # [num_layers, batch_size x beam_size, hidden_size] h = tf.tile(self.init_h(batch_size, hidden=init_h), [1, self.beam_size, 1]) # batch_position [batch_size] # [0, beam_size, beam_size * 2, .., beam_size * (batch_size-1)] # Points where batch starts in [batch_size x beam_size] tensors # Ex. position_idx[5]: when 5-th batch starts batch_position = tf.range(0, batch_size, dtype=tf.int32) * self.beam_size # Initialize scores of sequence # [batch_size x beam_size] # Ex. batch_size: 5, beam_size: 3 # [0, -inf, -inf, 0, -inf, -inf, 0, -inf, -inf, 0, -inf, -inf, 0, -inf, -inf] indice = tf.reshape(batch_position, [-1, 1]) shape = tf.constant([batch_size * self.beam_size]) updates = tf.constant([1] * batch_size) score = tf.cast((tf.scatter_nd(indice, updates, shape) - 1), tf.float32) * float(9999999999) # Initialize Beam that stores decisions for backtracking beam = Beam(batch_size, self.hidden_size, self.vocab_size, self.beam_size, self.max_unroll, batch_position) for i in range(self.max_unroll): # x: [batch_size x beam_size]; (token index) # => # out: [batch_size x beam_size, vocab_size] # h: [num_layers, batch_size x beam_size, hidden_size] out, h = self.forward_step(x, h, encoder_outputs=encoder_outputs, input_valid_length=input_valid_length) # log_prob: [batch_size x beam_size, vocab_size] log_prob = tf.nn.softmax(out, axis=1) # [batch_size x beam_size] # => [batch_size x beam_size, vocab_size] score = tf.reshape(score, [-1, 1]) + log_prob # Select `beam size` transitions out of `vocab size` combinations # [batch_size x beam_size, vocab_size] # => [batch_size, beam_size x vocab_size] # Cutoff and retain candidates with top-k scores # score: [batch_size, beam_size] # top_k_idx: [batch_size, beam_size] # each element of top_k_idx [0 ~ beam x vocab) score, top_k_idx = tf.math.top_k( tf.reshape(score, [batch_size, -1]), self.beam_size) # Get token ids with remainder after dividing by top_k_idx # Each element is among [0, vocab_size) # Ex. Index of token 3 in beam 4 # (4 * vocab size) + 3 => 3 # x: [batch_size x beam_size] x = tf.reshape((top_k_idx % self.vocab_size), [-1]) # top-k-pointer [batch_size x beam_size] # Points top-k beam that scored best at current step # Later used as back-pointer at backtracking # Each element is beam index: 0 ~ beam_size # + position index: 0 ~ beam_size x (batch_size-1) beam_idx = tf.cast((top_k_idx / self.vocab_size), tf.int32) # [batch_size, beam_size] top_k_pointer = tf.reshape( (beam_idx + tf.expand_dims(batch_position, 1)), [-1]) # Select next h (size doesn't change) # [num_layers, batch_size * beam_size, hidden_size] h = tf.gather(h, top_k_pointer, axis=1) # Update sequence scores at beam beam.update(score, top_k_pointer, x) # , h) # Erase scores for EOS so that they are not expanded # [batch_size, beam_size] eos_idx = tf.reshape(tf.math.equal(x, EOS_ID), [batch_size, self.beam_size]) if tf.where(eos_idx).shape[0] > 0: score = tf.where(eos_idx, -float('inf'), score) # prediction ([batch, k, max_unroll]) # A list of Tensors containing predicted sequence # final_score [batch, k] # A list containing the final scores for all top-k sequences # length [batch, k] # A list specifying the length of each sequence in the top-k candidates # prediction, final_score, length = beam.backtrack() prediction, final_score, length = beam.backtrack() return prediction, final_score, length
def measure_homodyne(self, phi, mode, select=None, **kwargs): """ Measures 'modes' in the basis of quadrature eigenstates (rotated by phi) and updates remaining modes conditioned on this result. After measurement, the states in 'modes' are reset to the vacuum. Args: phi (float): phase angle of quadrature to measure mode (int): which mode to measure. select (float): user-specified measurement value (used instead of random sampling) **kwargs: can be used to pass a session or a feed_dict. Otherwise a temporary session and no feed_dict will be used. Returns: The measured value (or a list of measured values when running in batch mode). """ if not isinstance(mode, int): raise ValueError("Specified modes are not valid.") else: if mode < 0 or mode >= self._num_modes: raise ValueError("Specified modes are not valid.") m_omega_over_hbar = 1 / self._hbar if self._state_is_pure: mode_size = 1 else: mode_size = 2 if self._batched: batch_offset = 1 batch_size = self._batch_size else: batch_offset = 0 batch_size = 1 with self.graph.as_default(): phi = tf.cast(phi, ops.def_type) phi = self._maybe_batch(phi) evaluate_results, session, feed_dict, close_session = ops._check_for_eval( kwargs) if select is not None: meas_result = self._maybe_batch(select) homodyne_sample = tf.cast(meas_result, tf.float64, name="Meas_result") else: # create reduced state on mode to be measured reduced_state = ops.reduced_density_matrix( self._state, mode, self._state_is_pure, self._batched) # rotate to homodyne basis # pylint: disable=invalid-unary-operand-type reduced_state = ops.phase_shifter(-phi, 0, reduced_state, self._cutoff_dim, False, self._batched) # create pdf for homodyne measurement # We use the following quadrature wavefunction for the Fock states: # \psi_n(x) = 1/sqrt[2^n n!](\frac{m \omega}{\pi \hbar})^{1/4} # \exp{-\frac{m \omega}{2\hbar} x^2} H_n(\sqrt{\frac{m \omega}{\pi}} x) # where H_n(x) is the (physicists) nth Hermite polynomial if "max" in kwargs: q_mag = kwargs["max"] else: q_mag = 10 if "num_bins" in kwargs: num_bins = kwargs["num_bins"] else: num_bins = 100000 if "q_tensor" in self._cache: # use cached q_tensor q_tensor = self._cache["q_tensor"] else: q_tensor = tf.constant(np.linspace(-q_mag, q_mag, num_bins)) self._cache["q_tensor"] = q_tensor x = np.sqrt(m_omega_over_hbar) * q_tensor if "hermite_polys" in self._cache: # use cached polynomials hermite_polys = self._cache["hermite_polys"] else: H0 = 0 * x + 1.0 H1 = 2 * x hermite_polys = [H0, H1] Hn = H1 Hn_m1 = H0 for n in range(1, self._cutoff_dim - 1): Hn_p1 = ops.H_n_plus_1(Hn, Hn_m1, n, x) hermite_polys.append(Hn_p1) Hn_m1 = Hn Hn = Hn_p1 self._cache["hermite_polys"] = hermite_polys number_state_indices = [ k for k in product(range(self._cutoff_dim), repeat=2) ] terms = [ 1 / np.sqrt(2**n * factorial(n) * 2**m * factorial(m)) * hermite_polys[n] * hermite_polys[m] for n, m in number_state_indices ] hermite_matrix = tf.scatter_nd( number_state_indices, terms, [self._cutoff_dim, self._cutoff_dim, num_bins]) hermite_terms = tf.multiply( tf.expand_dims(reduced_state, -1), tf.expand_dims(tf.cast(hermite_matrix, ops.def_type), 0)) rho_dist = tf.cast(tf.reduce_sum(hermite_terms, axis=[1, 2]), tf.float64) \ * (m_omega_over_hbar / np.pi) ** 0.5 \ * tf.exp(- x ** 2) \ * (q_tensor[1] - q_tensor[0]) # Delta_q for normalization (only works if the bins are equally spaced) # use tf.multinomial to sample logprobs = tf.log(rho_dist) samples_idx = tf.multinomial(logprobs, 1) homodyne_sample = tf.gather(q_tensor, samples_idx) homodyne_sample = tf.squeeze(homodyne_sample) if evaluate_results: meas_result = homodyne_sample.eval(feed_dict, session) if close_session: session.close() else: meas_result = tf.identity(homodyne_sample, name="Meas_result") # project remaining modes into conditional state if self._num_modes == 1: # in this case, all modes were measured and we we put everything into vacuum self.reset(pure=self._state_is_pure) else: # only some modes were measured: put unmeasured modes in conditional state, while reseting measured modes to vac inf_squeezed_vac = tf.convert_to_tensor( [(-0.5)**(m // 2) * np.sqrt(factorial(m)) / factorial(m // 2) if m % 2 == 0 else 0. for m in range(self._cutoff_dim)], dtype=ops.def_type) if self._batched: inf_squeezed_vac = tf.tile( tf.expand_dims(inf_squeezed_vac, 0), [batch_size, 1]) displacement_size = tf.stack( tf.convert_to_tensor(meas_result * np.sqrt(m_omega_over_hbar / 2))) quad_eigenstate = ops.displacement(displacement_size, 0, inf_squeezed_vac, self._cutoff_dim, True, self._batched) homodyne_eigenstate = ops.phase_shifter( phi, 0, quad_eigenstate, self._cutoff_dim, True, self._batched) conditional_state = ops.conditional_state( self._state, homodyne_eigenstate, mode, self._state_is_pure, batched=self._batched) # normalize if self._state_is_pure: norm = tf.norm(tf.reshape(conditional_state, [batch_size, -1]), axis=1) else: # calculate norm of conditional_state # cheap hack since tensorflow doesn't allow einsum equation for trace: r = conditional_state for _ in range(self._num_modes - 2): r = ops.partial_trace(r, 0, False, self._batched) norm = tf.trace(r) # for broadcasting norm_reshape = [1] * len( conditional_state.shape[batch_offset:]) if self._batched: norm_reshape = [self._batch_size] + norm_reshape normalized_conditional_state = conditional_state / tf.reshape( norm, norm_reshape) # reset measured modes into vacuum meas_mode_vac = self._single_mode_pure_vac if self._state_is_pure else self._single_mode_mixed_vac batch_index = indices[:batch_offset] meas_mode_indices = indices[batch_offset:batch_offset + mode_size] conditional_indices = indices[batch_offset + mode_size:batch_offset + mode_size * self._num_modes] eqn_lhs = batch_index + meas_mode_indices + "," + batch_index + conditional_indices eqn_rhs = '' meas_ctr = 0 cond_ctr = 0 for m in range(self._num_modes): if m == mode: # use measured_indices eqn_rhs += meas_mode_indices[mode_size * meas_ctr:mode_size * (meas_ctr + 1)] meas_ctr += 1 else: # use conditional indices eqn_rhs += conditional_indices[mode_size * cond_ctr:mode_size * (cond_ctr + 1)] cond_ctr += 1 eqn = eqn_lhs + "->" + batch_index + eqn_rhs new_state = tf.einsum(eqn, meas_mode_vac, normalized_conditional_state) self._update_state(new_state) return meas_result
def tensormol_acsf(xyzs, Zs, elements, element_pairs, radial_cutoff, angular_cutoff, radial_rs, angular_rs, theta_s, zeta, eta): """ This function uses the tensormol atom centred symmetry functions. :param xyzs: tensor of shape (n_samples, n_atoms, 3) :param Zs: tensor of shape (n_samples, n_atoms) :param elements: np.array of shape (n_elements,) :param element_pairs: np.array of shape (n_elementpairs, 2) :param radial_cutoff: scalar float :param angular_cutoff: scalar float :param radial_rs: np.array of shape (n_rad_rs,) :param angular_rs: np.array of shape (n_ang_rs,) :param theta_s: np.array of shape (n_thetas,) :param zeta: scalar float :param eta: scalar float :return: a tf tensor of shape (n_samples, n_atoms, n_rad_rs * n_elements + n_ang_rs * n_thetas * n_elementpairs) """ # The data with tf.name_scope("Params"): elements = tf.constant(elements, dtype=tf.int32) element_pairs = tf.constant(np.flip(element_pairs, axis=1), dtype=tf.int32) radial_cutoff = tf.constant(radial_cutoff, dtype=tf.float32) angular_cutoff = tf.constant(angular_cutoff, dtype=tf.float32) radial_rs = tf.constant(radial_rs, dtype=tf.float32) angular_rs = tf.constant(angular_rs, dtype=tf.float32) theta_s = tf.constant(theta_s, dtype=tf.float32) zeta = tf.constant(zeta, dtype=tf.float32) eta = tf.constant(eta, dtype=tf.float32) num_molecules = Zs.get_shape().as_list()[0] num_elements = elements.get_shape().as_list()[0] num_element_pairs = element_pairs.get_shape().as_list()[0] with tf.name_scope("Radial"): radial_embedding, pair_indices_rad, pair_elements = tf_symmetry_functions_radial_grid( xyzs, Zs, radial_cutoff, radial_rs, eta) with tf.name_scope("Angular"): angular_embedding, triples_indices, triples_element, sorted_triples_element_pairs = tf_symmetry_function_angular_grid( xyzs, Zs, angular_cutoff, angular_rs, theta_s, zeta, eta) with tf.name_scope("Sum_rad"): pair_element_indices = tf.cast( tf.where( tf.equal(tf.expand_dims(pair_elements[:, 1], axis=-1), tf.expand_dims(elements, axis=0))), tf.int32)[:, 1] triples_elements_indices = tf.cast( tf.where( tf.reduce_all(tf.equal( tf.expand_dims(sorted_triples_element_pairs, axis=-2), element_pairs), axis=-1)), tf.int32)[:, 1] radial_scatter_indices = tf.concat( [pair_indices_rad, tf.expand_dims(pair_element_indices, axis=1)], axis=1) angular_scatter_indices = tf.concat([ triples_indices, tf.expand_dims(triples_elements_indices, axis=1) ], axis=1) radial_molecule_embeddings = tf.dynamic_partition( radial_embedding, pair_indices_rad[:, 0], num_molecules) radial_atom_indices = tf.dynamic_partition( radial_scatter_indices[:, 1:], pair_indices_rad[:, 0], num_molecules) angular_molecule_embeddings = tf.dynamic_partition( angular_embedding, triples_indices[:, 0], num_molecules) angular_atom_indices = tf.dynamic_partition( angular_scatter_indices[:, 1:], triples_indices[:, 0], num_molecules) with tf.name_scope("Sum_ang"): embeddings = [] mol_atom_indices = [] for molecule in range(num_molecules): atom_indices = tf.cast(tf.where(tf.not_equal(Zs[molecule], 0)), tf.int32) molecule_atom_elements = tf.gather_nd(Zs[molecule], atom_indices) num_atoms = tf.shape(molecule_atom_elements)[0] radial_atom_embeddings = tf.reshape( tf.reduce_sum(tf.scatter_nd( radial_atom_indices[molecule], radial_molecule_embeddings[molecule], [ num_atoms, num_atoms, num_elements, tf.shape(radial_rs)[0] ]), axis=1), [num_atoms, -1]) angular_atom_embeddings = tf.reshape( tf.reduce_sum(tf.scatter_nd( angular_atom_indices[molecule], angular_molecule_embeddings[molecule], [ num_atoms, num_atoms, num_atoms, num_element_pairs, tf.shape(angular_rs)[0] * tf.shape(theta_s)[0] ]), axis=[1, 2]), [num_atoms, -1]) embeddings.append( tf.concat([radial_atom_embeddings, angular_atom_embeddings], axis=1)) mol_atom_indices.append( tf.concat([tf.fill([num_atoms, 1], molecule), atom_indices], axis=1)) embeddings = tf.concat(embeddings, axis=0) mol_atom_indices = tf.concat(mol_atom_indices, axis=0) atom_Zs = tf.cast(tf.gather_nd(Zs, tf.where(tf.not_equal(Zs, 0))), dtype=tf.int32) atom_Z_indices = tf.cast( tf.where( tf.equal(tf.expand_dims(atom_Zs, axis=1), tf.expand_dims(elements, axis=0)))[:, 1], tf.int32) with tf.name_scope("Result"): element_embeddings = tf.dynamic_partition(embeddings, atom_Z_indices, num_elements) mol_indices = tf.dynamic_partition(mol_atom_indices, atom_Z_indices, num_elements) return embeddings
def render_nd_bboxes_tf_spreading(elems, target_shape, ndim=2): """ elems: tensor of size [..., n_boxes, 2*ndim + val_dim], where in the last dimension, there are packed edge coordinates and values (of val_dim) to be filled in the specified box. target_shape: list/tuple of ndim entries. returns: rendered image of size [elems(...), target_shape..., val_dim] ('elems(...)' usually means batch_size) """ assert_shape_ndim = tf.Assert(tf.equal(tf.size(target_shape), ndim), [target_shape]) assert_nonempty_data = tf.Assert(tf.greater(tf.shape(elems)[-1], 2 * ndim), [elems]) with tf.control_dependencies([assert_shape_ndim, assert_nonempty_data]): """ +1 ...... -1 ++++++ ++++++ ........... ...... ++++++ ........... -> ...... -> ++++++ ........... ------ ++++++ -1 +1 in 3d there must be another wall of minuses. looking like that: - + ..... + - so when indexing [0, 1] to ltrb... pluses are when there is even number of 0s, - when odd. """ el_ndim = len(elems.shape) # we do not access this property in tensorflow runtime, but in 'compile time', because, well, # number of dimensions # should be known before assert el_ndim >= 2 and el_ndim <= 3, "elements should be in the form of [batch, n, coordinates] or [n, " \ "coordinates]" if el_ndim == 3: # we use batch_size dimension also! bboxes_per_batch = tf.shape(elems)[1] batch_size = tf.shape(elems)[ 0] # should be the same as image_input.shape[0] index_to_batch = tf.tile(tf.expand_dims(tf.range(batch_size), -1), (1, bboxes_per_batch)) index_to_batch = tf.reshape(index_to_batch, (-1, 1)) else: index_to_batch = None val_vector_size = tf.shape(elems)[-1] - 2 * ndim corner_ids = list(itertools.product([0, 1], repeat=ndim)) corners_lists = [] corners_values = [] for corner in corner_ids: plus = sum(corner) % 2 == 0 id_from_corner = [ i + ndim * c for i, c in enumerate(corner) ] # indexes a corner into [left, top, right, bottom] notation corner_coord = tf.gather(elems[..., 0:2 * ndim], id_from_corner, axis=-1) corner_value = elems[..., 2 * ndim:] * ( 1 if plus else -1) # last dimension is == val_vector_size if index_to_batch is not None: # if the operation is called in batches, remember to rehape it all into one long list for scatter_nd # and add (concatenate) the batch ids corner_coord = tf.concat( [index_to_batch, tf.reshape(corner_coord, (-1, 2))], axis=-1) corner_value = tf.reshape(corner_value, (-1, val_vector_size)) corners_lists.append(corner_coord) corners_values.append(corner_value) indices = tf.concat(corners_lists, axis=0) updates = tf.concat(corners_values, axis=0) shape = tf.concat( [tf.shape(elems)[:-2], target_shape, [val_vector_size]], axis=0) dense_orig = tf.scatter_nd( indices, updates, shape=shape, ) dense = dense_orig for dim in range(ndim): # we want to start from the axis before the last one. The last one is the value dimension, and # the first dimensions hidden in the '...' might be the batched dimensions dense = tf.cumsum(dense, axis=-2 - dim, exclusive=False, reverse=False, name=None) return dense
def model_fn(features, labels, mode, params): is_training = mode == tf.estimator.ModeKeys.TRAIN # Inputs tokens = features['features'] # (N, L) token_lengths = features['feature_length'] # (N,) sequence_mask = tf.sequence_mask(maxlen=tf.shape(tokens)[1], lengths=token_lengths) # (N,L) n = tf.shape(tokens)[0] L = tf.shape(tokens)[1] with tf.control_dependencies([ tf.assert_greater_equal(params.flat_length, token_lengths, message="Tokens longer than tree size"), tf.assert_greater(vocab_size, tokens, message="Tokens larger than vocab"), tf.assert_greater_equal(tokens, 0, message="Tokens less than 0") ]): tokens = tf.identity(tokens) if params.l2 > 0: weights_regularizer = slim.l2_regularizer(params.l2) else: weights_regularizer = None # Encoder mu_t, logsigma_t = vae_flat_encoder_simple( tokens=tokens, token_lengths=token_lengths, vocab_size=vocab_size, params=params, n=n, weights_regularizer=weights_regularizer ) # (L,N,D) mu = tf.transpose(mu_t, (1, 0, 2)) # (N,L,D) logsigma = tf.transpose(logsigma_t, (1, 0, 2)) # (N,L,D) # Sampling idx = tf.where(sequence_mask) with tf.name_scope("kl"): selected_mu = tf.gather_nd(params=mu, indices=idx) selected_logsigma = tf.gather_nd(params=logsigma, indices=idx) latent_sample_values, latent_prior_sample_values = kl( mu=selected_mu, logsigma=selected_logsigma, params=params, n=n) latent_sample = tf.scatter_nd( updates=latent_sample_values, indices=idx, shape=(n, L, latent_sample_values.shape[-1].value) ) # (N,L,D) latent_prior_sample = tf.scatter_nd( updates=latent_prior_sample_values, indices=idx, shape=(n, L, latent_prior_sample_values.shape[-1].value) ) # (N,L,D) # Decoder with tf.variable_scope('vae_decoder') as decoder_scope: logits, penalty = vae_decoder_dag( latent=latent_sample, vocab_size=vocab_size, sequence_lengths=token_lengths, params=params, weights_regularizer=weights_regularizer, n=n, is_training=is_training ) with tf.name_scope("dag_penalty"): penalty_scale = get_penalty_scale_logistic(params) dag_penalty_raw = tf.reduce_mean(tf.square(penalty)) weighted_dag_penalty = penalty_scale * dag_penalty_raw tf.losses.add_loss(loss=weighted_dag_penalty, loss_collection=tf.GraphKeys.REGULARIZATION_LOSSES) tf.summary.scalar('dag_penalty_scale', penalty_scale) tf.summary.scalar('dag_penalty_raw', dag_penalty_raw) tf.summary.scalar('dag_penalty_weighted', weighted_dag_penalty) # Loss calculation logits_values = tf.gather_nd(params=logits, indices=idx) labels_values = tf.gather_nd(params=tokens, indices=idx) onehot_labels_values = tf.one_hot(indices=labels_values, depth=vocab_size) loss_values = tf.losses.softmax_cross_entropy( onehot_labels=onehot_labels_values, logits=logits_values, reduction=tf.losses.Reduction.NONE, loss_collection=None ) loss_arr = tf.scatter_nd(updates=loss_values, indices=idx, shape=(n, L)) loss_n = tf.reduce_sum(loss_arr, axis=-1) loss = tf.reduce_mean(loss_n) tf.losses.add_loss(loss) tf.summary.scalar("softmax_cross_entropy", loss) total_loss = tf.losses.get_total_loss() # Generated data with tf.variable_scope(decoder_scope, reuse=True): glogits, _ = vae_decoder_dag( latent=latent_prior_sample, vocab_size=vocab_size, sequence_lengths=token_lengths, params=params, weights_regularizer=weights_regularizer, n=n, is_training=is_training ) # Hooks autoencode_hook = DAGHook( logits=logits, true=tokens, vocab=vocab, path=os.path.join(run_config.model_dir, "autoencoded", "autoencoded-{:08d}.csv"), name="Autoencoded", idx=idx ) generate_hook = DAGHook( logits=glogits, true=tokens, vocab=vocab, path=os.path.join(run_config.model_dir, "generated", "generated-{:08d}.csv"), name="Generated", idx=idx ) evaluation_hooks = [autoencode_hook, generate_hook] #tf.summary.scalar('model_total_loss', total_loss) # Train optimizer = tf.train.AdamOptimizer(params.lr) train_op = slim.learning.create_train_op( total_loss, optimizer, clip_gradient_norm=params.clip_gradient_norm) eval_metric_ops = { 'cross_entropy_eval': tf.metrics.mean(loss_n), 'token_lengths_eval': tf.metrics.mean(token_lengths) } return tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, eval_metric_ops=eval_metric_ops, evaluation_hooks=evaluation_hooks, train_op=train_op)
TF_B_map = tf.constant(np.squeeze(B_map)) OPD_mask_flat = tf.Variable( np.reshape(OPD_mask, OPD_mask.shape[0] * OPD_mask.shape[1])) # Placeholder for the learningrate TF_lr = tf.placeholder(tf.float32, shape=[]) TF_lambda_TV = tf.placeholder(tf.float32, shape=[]) TF_epsC = tf.placeholder(tf.float32, shape=[]) # This is the matlab reconstruction (Minimum Norm) #TODO: We want to compute this in Python too! TF_opd = tf.Variable(myinitopd) # We only want to update the inner part of the mask (where OPD_mask is greater than 0) updates = tf.boolean_mask(TF_opd, OPD_mask > 0) # TF_opd*OPD_mask indexes = tf.cast(tf.where(OPD_mask > 0), tf.int32) TF_opd_masked = tf.scatter_nd(indexes, updates, tf.shape(OPD_mask)) # Compute the "Guess" based on the Variable OPD TF_R_guess = tf_jammin.polyeval(TF_opd_masked, np.squeeze(R_fit_func.coeffs)) TF_G_guess = tf_jammin.polyeval(TF_opd_masked, np.squeeze(G_fit_func.coeffs)) TF_B_guess = tf_jammin.polyeval(TF_opd_masked, np.squeeze(B_fit_func.coeffs)) ''' formulate cost-fct 1:''' # we want to add a smootheness constraint on the result coming from L2 minimization, # This is done by adding TV-regularizer on the indexed image # This one should reduce the L2 distance between the RGB Pixels to the one in the # RGB-OPD lookup-table TF_mySqrError = tf.reduce_mean( ((TF_R_guess - TF_R_exp)**2 + (TF_G_guess - TF_G_exp)**2 + (TF_B_guess - TF_B_exp)**2)) # in order to have a smooth phase without discontinuities we want to have a small TV norm
def inference(self, inputs, nb_classes, bias_mat, hid_units, n_heads, activation=tf.nn.elu, residual=False, k=0.5): select_num = tf.cast(inputs.shape[1].value * k, dtype=tf.int32) # mean_sum = tf.reduce_sum(tf.square(inputs), -1) p = tf.Variable( tf.truncated_normal([int(inputs.shape[-1]), 1], stddev=0.1)) mean_sum = tf.reshape( tf.matmul(inputs, p) / tf.reduce_sum(tf.square(p)), [-1, int(inputs.shape[1])]) a_top, a_top_idx = tf.nn.top_k(mean_sum, select_num) a_top_1, a_top_idx_1 = tf.nn.top_k(mean_sum, inputs.shape[1]) a_shape = tf.shape(mean_sum) a_top_sm = a_top * 0 + 1 a_row_idx = tf.tile( tf.range(a_shape[0])[:, tf.newaxis], (1, select_num)) """ a_row_idx = [array([[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], ... [15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15]], dtype=int32)] """ scatter_idx = tf.stack([a_row_idx, a_top_idx], axis=-1) result = tf.scatter_nd(scatter_idx, a_top_sm, a_shape) a_index = tf.tile(tf.expand_dims(result, -1), (1, 1, inputs.shape[-1])) c_index = a_index inputs = a_index * inputs attns = [] for _ in range(n_heads[0]): attns.append( attn_head(inputs, bias_mat=bias_mat, out_sz=hid_units[0], activation=activation, residual=False)) h_1 = tf.concat(attns, axis=-1) for i in range(1, len(hid_units)): attns = [] for _ in range(n_heads[i]): attns.append( attn_head(h_1, bias_mat=bias_mat, out_sz=hid_units[i], activation=activation, residual=residual)) h_1 = tf.concat(attns, axis=-1) a_index = tf.tile(tf.expand_dims(result, -1), (1, 1, h_1.shape[-1])) h_1 = a_index * h_1 logits = tf.layers.dense(inputs=h_1, units=nb_classes, activation=tf.nn.leaky_relu) a_index = tf.tile(tf.expand_dims(result, -1), (1, 1, logits.shape[-1])) logits = a_index * logits return a_index, h_1, logits, inputs, select_num, a_top_idx_1