def score(self, graph, histories, states, observations, beliefs, lookaheads): del beliefs, lookaheads _, local_histories = histories _, local_states = states batch_shape = tf.shape(local_states)[:-2] full_states = tf.concat([local_histories, local_states], axis=-1) pairwise_bilinear_scores = tf.linalg.matmul(tf.linalg.tensordot( full_states, self._linear, axes=1), full_states, transpose_b=True) scaled_pairwise_bilinear_scores = tf.math.divide( pairwise_bilinear_scores, tf.math.sqrt(util.float(self._dim_concat))) logits = tf.reshape(scaled_pairwise_bilinear_scores, shape=tf.stack([ *tf.unstack(batch_shape), tf.math.square(graph.num_nodes) ])) labels = tf.reshape(tf.sparse.to_dense(graph.adjacency), shape=[-1]) broadcast_labels = tf.math.add(util.float(labels), tf.zeros_like(logits)) cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits( labels=broadcast_labels, logits=logits) return tf.math.negative(tf.math.reduce_sum(cross_entropy, axis=-1))
def _compute_nce_score(self, predictions, lookaheads): assert predictions.shape.ndims == lookaheads.shape.ndims shape = tf.shape(predictions) batch_size, max_num_nodes = shape[-3], shape[-2] unknown_prefix = shape[:-3] unknown_prefix_list = tf.unstack(unknown_prefix) # (..., B, N, dh) -> (..., B * N, dh) flat_shape = tf.stack( [*unknown_prefix_list, batch_size * max_num_nodes, -1]) # * (dh, ds) -> (..., B * N, ds) flat_predictions = tf.reshape(predictions, flat_shape) flat_lookaheads = tf.reshape(lookaheads, flat_shape) # (..., B * N, ds) * (..., [B * N, ds].T) -> (..., B * N, B * N) pairwise_log_bilinear_scores = tf.math.divide( tf.linalg.matmul(flat_predictions, tf.linalg.tensordot(flat_lookaheads, self._linear, axes=1), transpose_b=True), tf.math.sqrt(util.float(util.dim(predictions)))) # (..., B * N, B * N) -> (..., B * N) -> (..., B, N) batched_nce_scores = tf.math.subtract( tf.linalg.diag_part(pairwise_log_bilinear_scores), tf.math.reduce_logsumexp(pairwise_log_bilinear_scores, axis=-1)) return tf.reshape( batched_nce_scores, tf.stack([*unknown_prefix_list, batch_size, max_num_nodes]))
def score(self, graph, histories, states, observations, beliefs, lookaheads): ''' Args: histories: A 2-ary tuple: - global_histories: A (..., B, dH) Tensor. - local_histories: A (..., B, N, dH) Tensor. states: A 2-ary tuple: - global_states: A (..., B, dz) Tensor. - local_states: A (..., B, N, dz) Tensor. ''' del graph, beliefs global_histories, local_histories = histories global_states, local_states = states shape = tf.shape(lookaheads) with tf.control_dependencies([ tf.assert_equal(tf.size(shape), 3), tf.assert_equal(shape[:-1], tf.shape(local_states)[-3:-1]) ]): summaries = lookaheads batch_size, max_num_nodes = shape[0], shape[1] unknown_prefix = tf.shape(local_states)[:-3] unknown_prefix_list = tf.unstack(unknown_prefix) local_context = _broadcast_and_concat(global_states, local_states) \ if self._state == "z" \ else _broadcast_and_concat(global_histories, local_histories) # (..., B, N, dz) -> # (..., B * N, dz) * (dz, ds) -> (..., B * N, ds) flat_local_context = tf.reshape( local_context, tf.stack([*unknown_prefix_list, batch_size * max_num_nodes, -1])) transformed_local_context = tf.linalg.tensordot(flat_local_context, self._linear, axes=1) # (B, N, ds) -> (B * N, ds) -> (..., B * N, ds) broadcast_summaries = tf.math.add( tf.zeros_like(transformed_local_context), tf.reshape(summaries, tf.stack([batch_size * max_num_nodes, -1]))) # (..., B * N, ds) * (..., [B * N, ds].T) -> (..., B * N, B * N) pairwise_log_bilinear_scores = tf.math.divide( tf.linalg.matmul(transformed_local_context, broadcast_summaries, transpose_b=True), tf.math.sqrt(util.float(self._dim_summary))) # (..., B * N, B * N) -> (..., B * N) -> (..., B, N) batched_nce_scores = tf.math.subtract( tf.linalg.diag_part(pairwise_log_bilinear_scores), tf.math.reduce_logsumexp(pairwise_log_bilinear_scores, axis=-1)) return tf.reshape( batched_nce_scores, tf.stack([*unknown_prefix_list, batch_size, max_num_nodes]))
def _score_recv_only(self, graph, local_histories, lookaheads): prefix_shape = tf.shape(local_histories)[-4:-1] # (S, B, N) num_samples, batch_size, num_nodes = tf.unstack(prefix_shape) node_ids = tf.random.uniform( shape=[num_samples, batch_size, self._num_masked_nodes], dtype=tf.int32, minval=0, maxval=num_nodes) # (S, B, N, N) and (S, B, N) adj_mask, node_mask = graph.gen_dense_recv_only_mask(node_ids) mask = adj_mask if not self._gnn_is_sparse else \ graph.gen_sparse_recv_only_mask(node_ids) # (S, B, E) masked_local_histories = tf.math.multiply( local_histories, tf.math.subtract(1.0, tf.expand_dims(util.float(node_mask), axis=-1))) hidden = self._gnn(graph=graph, states=masked_local_histories, reverse_mask=mask) scores = self._compute_nce_score(self._mlp(hidden), lookaheads) return tf.math.multiply(scores, util.float(node_mask))
def score(self, graph, histories, states, observations, beliefs, lookaheads): ''' Args: histories: A 2-ary tuple: - global_histories: A (..., B, dH) Tensor. - local_histories: A (..., B, N, dH) Tensor. states: A 2-ary tuple: - global_states: A (..., B, dz) Tensor. - local_states: A (..., B, N, dz) Tensor. ''' del beliefs, lookaheads _, local_histories = histories global_states, _ = states # (..., B, N, dz) * (dz, ds) -> (..., B, N, ds) transformed_local_embeddings = tf.linalg.tensordot(local_histories, self._linear, axes=1) # (..., B, N, ds) -> (..., B, ds) * (ds, ds) -> (..., B, ds) transformed_graph_readouts = tf.linalg.tensordot(global_states, self._bilinear, axes=1) # (..., B, ds) -> (..., B, B, ds) shape_list = tf.unstack(tf.shape(transformed_graph_readouts)) broadcast_graph_readouts = tf.math.add( tf.zeros(tf.stack([*shape_list[:-1], *shape_list[-2:]])), tf.expand_dims(transformed_graph_readouts, axis=-3)) # (..., B, N, ds) * (..., B, [B, ds].T) -> (..., B, N, B) global_local_bilinear_scores = tf.linalg.matmul( transformed_local_embeddings, broadcast_graph_readouts, transpose_b=True) scaled_scores = tf.math.divide( global_local_bilinear_scores, tf.math.sqrt(util.float(self._dim_summary))) # (..., B1, N, B2) -> (..., N, B1, B2) perm = tf.range(scaled_scores.shape.ndims) # (..., -3, -2, -1) -> (..., -2, -3, -1) perm = tf.stack([*tf.unstack(perm[:-3]), perm[-2], perm[-3], perm[-1]]) transposed_scores = tf.transpose(scaled_scores, perm) # (..., N, B, B) -> (..., N, B) -> (..., B, N) batched_nce_scores = tf.math.subtract( tf.linalg.diag_part(transposed_scores), tf.math.reduce_logsumexp(transposed_scores, axis=-1)) return tf.linalg.transpose(batched_nce_scores)
def sched(self, context, batch_size): t, length = context.t, context.length if self.randomly: use_proposal = tf.math.less( tf.random.uniform(shape=[batch_size], dtype=tf.float32), self.rate) else: use_proposal = tf.math.equal( tf.floormod(t, self.period), tf.zeros([batch_size], dtype=tf.int32)) use_proposal = tf.math.logical_or( use_proposal, tf.math.logical_and(tf.math.equal(t, length - 1), self.refresh_last_step)) use_proposal = tf.math.logical_or(use_proposal, tf.math.less(t, self.prefix_length)) context.use_proposal = use_proposal return util.float(use_proposal)
def __init__(self, edges, center_mask, node_mask, edge_mask, dense=False, node_attrs=None, edge_attrs=None, reversed=None): batch_size = tf.shape(edges)[0] num_nodes = tf.math.reduce_sum(node_mask, axis=-1) # (B, N) -> (B) max_num_nodes = num_nodes[tf.math.argmax(num_nodes)] num_edges = tf.math.reduce_sum(edge_mask, axis=-1) # (B, E) -> (B) max_num_edges = num_edges[tf.math.argmax(num_edges)] with tf.control_dependencies([ tf.assert_equal(tf.shape(node_mask), tf.shape(center_mask)), tf.assert_equal(tf.shape(edges)[:-1], tf.shape(edge_mask)), tf.assert_equal(tf.shape(node_mask)[-1], max_num_nodes), tf.assert_equal(tf.shape(edge_mask)[-1], max_num_edges), tf.assert_equal(tf.size(num_nodes), batch_size) ]): sparse_edges, batch_edge_indices = self._gen_sparse_edges( batched_edges=edges, batched_edge_mask=edge_mask ) adjs, edges = self._gen_sparse_adj_matrix( sparse_edges=sparse_edges, batched_edges=edges, batched_edge_mask=edge_mask, max_num_nodes=max_num_nodes ) # (B, N, N) -> (B, N) indegree = tf.sparse.reduce_sum(adjs, axis=-2) outdegree = tf.sparse.reduce_sum(adjs, axis=-1) # (B, N) -- batch_gather([B, E]) --> (B, E) tail_indegree = tf.batch_gather(indegree, indices=edges[..., 1]) head_outdegree = tf.batch_gather(outdegree, indices=edges[..., 0]) if node_attrs is not None and node_attrs.shape.ndims == 2: with tf.control_dependencies([ tf.assert_equal(tf.shape(node_attrs)[0], max_num_nodes) ]): node_attrs = tf.tile( tf.expand_dims(node_attrs, axis=0), tf.stack([batch_size, 1, 1]) ) self._batch_size = batch_size self._adjs = adjs self._edges = tf.cast(edges, tf.int32) self._edges_int64 = tf.cast(edges, tf.int64) self._senders = self._edges[..., 0] self._receivers = self._edges[..., 1] self._sparse_edges = tf.cast(sparse_edges, tf.int64) self._batch_edge_indices = tf.cast(batch_edge_indices, tf.int32) self._node_attrs = node_attrs self._edge_attrs = edge_attrs self._num_nodes = num_nodes self._num_edges = num_edges self._max_num_nodes = max_num_nodes self._max_num_edges = max_num_edges self._total_num_nodes = tf.math.reduce_sum(num_nodes, axis=-1) self._total_num_edges = tf.math.reduce_sum(num_edges, axis=-1) self._indegree = indegree self._outdegree = outdegree self._tail_indegree = util.float(tail_indegree) self._head_outdegree = util.float(head_outdegree) self._center_mask = util.float(center_mask) self._node_mask = util.float(node_mask) self._edge_mask = util.float(edge_mask) self._reversed = reversed self._dense_adjs = self._dense_edge_attrs = None if dense: self._dense_adjs = tf.sparse.to_dense(adjs) self._dense_edge_attrs = self._edge_attrs_to_dense( edges, edge_attrs, max_num_nodes )
def sample(prior, variational_prior, make_likelihood, observations): ''' Args: prior: A distribution with event shape (dz) and compatiable batch shape w.r.t. variational_prior make_likelihood: A function that returns a distribution with batch shape (..., N) and event shape (dx) variational_prior: A distribution with batch shape (..., N) and event shape (dz) observation: A (B, N, dx) Tensor. initial_position: Optional. If given, it should a tensor sampled from `variational_prior` Returns: samples: A (..., N, dz) Tensor. weights: A (..., N, dz) Tensor. ''' assert observations.shape.ndims == 3 num_nodes = tf.shape(observations)[1] flat_num_dims = global_num_dims + num_nodes * local_num_dims step_size = tf.concat( [global_step_size, tf.tile(local_step_size, [num_nodes])], axis=0) half_step_size = tf.math.divide(step_size, 2.0) momentum_scale_diag = tf.math.multiply(momentum_scale_factor, tf.ones([flat_num_dims])) momentum_inv_variance = tf.math.divide(1.0, tf.square(momentum_scale_diag)) def cond(t, *unused_args): return tf.less(t, num_steps + 1) def body(t, position, momentum, inv_temp, *unused_args): likelihood = make_likelihood(position) neg_log_prob = -tf.math.add(prior.log_prob(position), likelihood.log_prob(observations)) with tf.control_dependencies([ tf.assert_equal(tf.shape(neg_log_prob), tf.shape(position)[:-1]) ]): gradients = tf.gradients(neg_log_prob, position) gradient = gradients[0] with tf.control_dependencies( [tf.assert_equal(tf.shape(gradient), tf.shape(position))]): new_momentum_tmp = tf.math.subtract( momentum, tf.math.multiply(half_step_size, gradient)) new_momentum_tmp_rescaled = tf.math.multiply( momentum_inv_variance, new_momentum_tmp) new_position = tf.math.add( position, tf.math.multiply(step_size, new_momentum_tmp_rescaled)) new_likelihood = make_likelihood(new_position) new_neg_log_prob = -tf.math.add( prior.log_prob(new_position), new_likelihood.log_prob(observations)) new_gradient = tf.gradients(new_neg_log_prob, new_position)[0] new_momentum = tf.math.subtract( new_momentum_tmp, tf.math.multiply(half_step_size, new_gradient)) new_inv_temp = sched_inv_temp(init_inv_temp, t, num_steps) tempering_factor = tf.math.sqrt( tf.math.divide(inv_temp, new_inv_temp)) new_tempered_momentum = tf.math.multiply(new_momentum, tempering_factor) return t + 1, new_position, new_tempered_momentum, new_inv_temp initial_position, initial_log_v_prior_prob = \ variational_prior.sample() momentum_loc = tf.zeros_like(initial_position) initial_tempered_scale_diag = tf.math.divide( momentum_scale_diag, tf.math.sqrt(init_inv_temp)) initial_momentum_dist = tfd.MultivariateNormalDiag( loc=momentum_loc, scale_diag=initial_tempered_scale_diag) initial_momentum = initial_momentum_dist.sample(1)[0] with tf.control_dependencies([ tf.assert_equal(tf.shape(initial_momentum), tf.shape(initial_position)) ]): initial_momentum = tf.identity(initial_momentum) t1 = tf.constant(1) _, position, momentum, inv_temp = tf.while_loop( cond, body, [t1, initial_position, initial_momentum, init_inv_temp]) # # Static loop: # # t, position, momentum, inv_temp = \ # t1, initial_position, initial_momentum, init_inv_temp # for _ in range(num_steps): # t, position, momentum, inv_temp = body( # t, position, momentum, inv_temp # ) # final_position, final_momentum, final_inv_temp = \ position, momentum, inv_temp with tf.control_dependencies([tf.assert_equal(final_inv_temp, 1.0)]): final_position = tf.identity(final_position) final_momentum_dist = tfd.MultivariateNormalDiag( loc=momentum_loc, scale_diag=momentum_scale_diag) final_likelihood = make_likelihood(final_position) log_jacobian = tf.math.multiply( tf.math.divide(util.float(flat_num_dims), 2.0), tf.math.log(init_inv_temp)) log_p = tf.math.add( tf.math.add(prior.log_prob(final_position), final_likelihood.log_prob(observations)), final_momentum_dist.log_prob(final_momentum)) log_q = tf.math.subtract( tf.math.add(initial_log_v_prior_prob, initial_momentum_dist.log_prob(initial_momentum)), log_jacobian) return final_position, tf.math.subtract(log_p, log_q)