示例#1
0
    def score(self, graph, histories, states, observations, beliefs,
              lookaheads):
        del beliefs, lookaheads
        _, local_histories = histories
        _, local_states = states
        batch_shape = tf.shape(local_states)[:-2]

        full_states = tf.concat([local_histories, local_states], axis=-1)
        pairwise_bilinear_scores = tf.linalg.matmul(tf.linalg.tensordot(
            full_states, self._linear, axes=1),
                                                    full_states,
                                                    transpose_b=True)
        scaled_pairwise_bilinear_scores = tf.math.divide(
            pairwise_bilinear_scores,
            tf.math.sqrt(util.float(self._dim_concat)))

        logits = tf.reshape(scaled_pairwise_bilinear_scores,
                            shape=tf.stack([
                                *tf.unstack(batch_shape),
                                tf.math.square(graph.num_nodes)
                            ]))
        labels = tf.reshape(tf.sparse.to_dense(graph.adjacency), shape=[-1])
        broadcast_labels = tf.math.add(util.float(labels),
                                       tf.zeros_like(logits))

        cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(
            labels=broadcast_labels, logits=logits)
        return tf.math.negative(tf.math.reduce_sum(cross_entropy, axis=-1))
示例#2
0
    def _compute_nce_score(self, predictions, lookaheads):
        assert predictions.shape.ndims == lookaheads.shape.ndims
        shape = tf.shape(predictions)
        batch_size, max_num_nodes = shape[-3], shape[-2]
        unknown_prefix = shape[:-3]
        unknown_prefix_list = tf.unstack(unknown_prefix)
        # (..., B, N, dh) -> (..., B * N, dh)
        flat_shape = tf.stack(
            [*unknown_prefix_list, batch_size * max_num_nodes, -1])
        # * (dh, ds) -> (..., B * N, ds)
        flat_predictions = tf.reshape(predictions, flat_shape)
        flat_lookaheads = tf.reshape(lookaheads, flat_shape)

        # (..., B * N, ds) * (..., [B * N, ds].T) -> (..., B * N, B * N)
        pairwise_log_bilinear_scores = tf.math.divide(
            tf.linalg.matmul(flat_predictions,
                             tf.linalg.tensordot(flat_lookaheads,
                                                 self._linear,
                                                 axes=1),
                             transpose_b=True),
            tf.math.sqrt(util.float(util.dim(predictions))))
        # (..., B * N, B * N) -> (..., B * N) -> (..., B, N)
        batched_nce_scores = tf.math.subtract(
            tf.linalg.diag_part(pairwise_log_bilinear_scores),
            tf.math.reduce_logsumexp(pairwise_log_bilinear_scores, axis=-1))
        return tf.reshape(
            batched_nce_scores,
            tf.stack([*unknown_prefix_list, batch_size, max_num_nodes]))
示例#3
0
    def score(self, graph, histories, states, observations, beliefs,
              lookaheads):
        '''
        Args:
          histories: A 2-ary tuple:
            - global_histories: A (..., B, dH) Tensor.
            - local_histories: A (..., B, N, dH) Tensor.
          states: A 2-ary tuple:
            - global_states: A (..., B, dz) Tensor.
            - local_states: A (..., B, N, dz) Tensor.
        '''
        del graph, beliefs
        global_histories, local_histories = histories
        global_states, local_states = states

        shape = tf.shape(lookaheads)
        with tf.control_dependencies([
                tf.assert_equal(tf.size(shape), 3),
                tf.assert_equal(shape[:-1],
                                tf.shape(local_states)[-3:-1])
        ]):
            summaries = lookaheads
            batch_size, max_num_nodes = shape[0], shape[1]
            unknown_prefix = tf.shape(local_states)[:-3]
            unknown_prefix_list = tf.unstack(unknown_prefix)

        local_context = _broadcast_and_concat(global_states, local_states) \
            if self._state == "z" \
            else _broadcast_and_concat(global_histories, local_histories)

        # (..., B, N, dz) ->
        # (..., B * N, dz) * (dz, ds) -> (..., B * N, ds)
        flat_local_context = tf.reshape(
            local_context,
            tf.stack([*unknown_prefix_list, batch_size * max_num_nodes, -1]))
        transformed_local_context = tf.linalg.tensordot(flat_local_context,
                                                        self._linear,
                                                        axes=1)

        # (B, N, ds) -> (B * N, ds) -> (..., B * N, ds)
        broadcast_summaries = tf.math.add(
            tf.zeros_like(transformed_local_context),
            tf.reshape(summaries, tf.stack([batch_size * max_num_nodes, -1])))

        # (..., B * N, ds) * (..., [B * N, ds].T) -> (..., B * N, B * N)
        pairwise_log_bilinear_scores = tf.math.divide(
            tf.linalg.matmul(transformed_local_context,
                             broadcast_summaries,
                             transpose_b=True),
            tf.math.sqrt(util.float(self._dim_summary)))
        # (..., B * N, B * N) -> (..., B * N) -> (..., B, N)
        batched_nce_scores = tf.math.subtract(
            tf.linalg.diag_part(pairwise_log_bilinear_scores),
            tf.math.reduce_logsumexp(pairwise_log_bilinear_scores, axis=-1))
        return tf.reshape(
            batched_nce_scores,
            tf.stack([*unknown_prefix_list, batch_size, max_num_nodes]))
示例#4
0
    def _score_recv_only(self, graph, local_histories, lookaheads):
        prefix_shape = tf.shape(local_histories)[-4:-1]  # (S, B, N)
        num_samples, batch_size, num_nodes = tf.unstack(prefix_shape)

        node_ids = tf.random.uniform(
            shape=[num_samples, batch_size, self._num_masked_nodes],
            dtype=tf.int32,
            minval=0,
            maxval=num_nodes)
        # (S, B, N, N) and (S, B, N)
        adj_mask, node_mask = graph.gen_dense_recv_only_mask(node_ids)
        mask = adj_mask if not self._gnn_is_sparse else \
            graph.gen_sparse_recv_only_mask(node_ids)  # (S, B, E)

        masked_local_histories = tf.math.multiply(
            local_histories,
            tf.math.subtract(1.0, tf.expand_dims(util.float(node_mask),
                                                 axis=-1)))
        hidden = self._gnn(graph=graph,
                           states=masked_local_histories,
                           reverse_mask=mask)
        scores = self._compute_nce_score(self._mlp(hidden), lookaheads)
        return tf.math.multiply(scores, util.float(node_mask))
示例#5
0
    def score(self, graph, histories, states, observations, beliefs,
              lookaheads):
        '''
        Args:
          histories: A 2-ary tuple:
            - global_histories: A (..., B, dH) Tensor.
            - local_histories: A (..., B, N, dH) Tensor.
          states: A 2-ary tuple:
            - global_states: A (..., B, dz) Tensor.
            - local_states: A (..., B, N, dz) Tensor.
        '''
        del beliefs, lookaheads
        _, local_histories = histories
        global_states, _ = states

        # (..., B, N, dz) * (dz, ds) -> (..., B, N, ds)
        transformed_local_embeddings = tf.linalg.tensordot(local_histories,
                                                           self._linear,
                                                           axes=1)
        # (..., B, N, ds) -> (..., B, ds) * (ds, ds) -> (..., B, ds)
        transformed_graph_readouts = tf.linalg.tensordot(global_states,
                                                         self._bilinear,
                                                         axes=1)

        # (..., B, ds) -> (..., B, B, ds)
        shape_list = tf.unstack(tf.shape(transformed_graph_readouts))
        broadcast_graph_readouts = tf.math.add(
            tf.zeros(tf.stack([*shape_list[:-1], *shape_list[-2:]])),
            tf.expand_dims(transformed_graph_readouts, axis=-3))

        # (..., B, N, ds) * (..., B, [B, ds].T) -> (..., B, N, B)
        global_local_bilinear_scores = tf.linalg.matmul(
            transformed_local_embeddings,
            broadcast_graph_readouts,
            transpose_b=True)
        scaled_scores = tf.math.divide(
            global_local_bilinear_scores,
            tf.math.sqrt(util.float(self._dim_summary)))

        # (..., B1, N, B2) -> (..., N, B1, B2)
        perm = tf.range(scaled_scores.shape.ndims)
        # (..., -3, -2, -1) -> (..., -2, -3, -1)
        perm = tf.stack([*tf.unstack(perm[:-3]), perm[-2], perm[-3], perm[-1]])
        transposed_scores = tf.transpose(scaled_scores, perm)

        # (..., N, B, B) -> (..., N, B) -> (..., B, N)
        batched_nce_scores = tf.math.subtract(
            tf.linalg.diag_part(transposed_scores),
            tf.math.reduce_logsumexp(transposed_scores, axis=-1))
        return tf.linalg.transpose(batched_nce_scores)
示例#6
0
    def sched(self, context, batch_size):
        t, length = context.t, context.length

        if self.randomly:
            use_proposal = tf.math.less(
                tf.random.uniform(shape=[batch_size], dtype=tf.float32),
                self.rate)
        else:
            use_proposal = tf.math.equal(
                tf.floormod(t, self.period),
                tf.zeros([batch_size], dtype=tf.int32))

        use_proposal = tf.math.logical_or(
            use_proposal,
            tf.math.logical_and(tf.math.equal(t, length - 1),
                                self.refresh_last_step))
        use_proposal = tf.math.logical_or(use_proposal,
                                          tf.math.less(t, self.prefix_length))
        context.use_proposal = use_proposal
        return util.float(use_proposal)
示例#7
0
    def __init__(self, edges, center_mask, node_mask, edge_mask,
                 dense=False, node_attrs=None, edge_attrs=None,
                 reversed=None):
        batch_size = tf.shape(edges)[0]
        num_nodes = tf.math.reduce_sum(node_mask, axis=-1)  # (B, N) -> (B)
        max_num_nodes = num_nodes[tf.math.argmax(num_nodes)]
        num_edges = tf.math.reduce_sum(edge_mask, axis=-1)  # (B, E) -> (B)
        max_num_edges = num_edges[tf.math.argmax(num_edges)]

        with tf.control_dependencies([
            tf.assert_equal(tf.shape(node_mask), tf.shape(center_mask)),
            tf.assert_equal(tf.shape(edges)[:-1], tf.shape(edge_mask)),
            tf.assert_equal(tf.shape(node_mask)[-1], max_num_nodes),
            tf.assert_equal(tf.shape(edge_mask)[-1], max_num_edges),
            tf.assert_equal(tf.size(num_nodes), batch_size)
        ]):
            sparse_edges, batch_edge_indices = self._gen_sparse_edges(
                batched_edges=edges, batched_edge_mask=edge_mask
            )
            adjs, edges = self._gen_sparse_adj_matrix(
                sparse_edges=sparse_edges, batched_edges=edges,
                batched_edge_mask=edge_mask, max_num_nodes=max_num_nodes
            )

        # (B, N, N) -> (B, N)
        indegree = tf.sparse.reduce_sum(adjs, axis=-2)
        outdegree = tf.sparse.reduce_sum(adjs, axis=-1)
        # (B, N) -- batch_gather([B, E]) --> (B, E)
        tail_indegree = tf.batch_gather(indegree, indices=edges[..., 1])
        head_outdegree = tf.batch_gather(outdegree, indices=edges[..., 0])

        if node_attrs is not None and node_attrs.shape.ndims == 2:
            with tf.control_dependencies([
                tf.assert_equal(tf.shape(node_attrs)[0], max_num_nodes)
            ]):
                node_attrs = tf.tile(
                    tf.expand_dims(node_attrs, axis=0),
                    tf.stack([batch_size, 1, 1])
                )

        self._batch_size = batch_size
        self._adjs = adjs
        self._edges = tf.cast(edges, tf.int32)
        self._edges_int64 = tf.cast(edges, tf.int64)
        self._senders = self._edges[..., 0]
        self._receivers = self._edges[..., 1]
        self._sparse_edges = tf.cast(sparse_edges, tf.int64)
        self._batch_edge_indices = tf.cast(batch_edge_indices, tf.int32)
        self._node_attrs = node_attrs
        self._edge_attrs = edge_attrs
        self._num_nodes = num_nodes
        self._num_edges = num_edges
        self._max_num_nodes = max_num_nodes
        self._max_num_edges = max_num_edges
        self._total_num_nodes = tf.math.reduce_sum(num_nodes, axis=-1)
        self._total_num_edges = tf.math.reduce_sum(num_edges, axis=-1)
        self._indegree = indegree
        self._outdegree = outdegree
        self._tail_indegree = util.float(tail_indegree)
        self._head_outdegree = util.float(head_outdegree)
        self._center_mask = util.float(center_mask)
        self._node_mask = util.float(node_mask)
        self._edge_mask = util.float(edge_mask)
        self._reversed = reversed

        self._dense_adjs = self._dense_edge_attrs = None
        if dense:
            self._dense_adjs = tf.sparse.to_dense(adjs)
            self._dense_edge_attrs = self._edge_attrs_to_dense(
                edges, edge_attrs, max_num_nodes
            )
示例#8
0
    def sample(prior, variational_prior, make_likelihood, observations):
        '''
        Args:
          prior: A distribution with event shape (dz) and compatiable
              batch shape w.r.t. variational_prior
          make_likelihood: A function that returns a distribution with
              batch shape (..., N) and event shape (dx)
          variational_prior: A distribution with batch shape (..., N) and
              event shape (dz)
          observation: A (B, N, dx) Tensor.
          initial_position: Optional. If given, it should a tensor sampled from
              `variational_prior`

        Returns:
          samples: A (..., N, dz) Tensor.
          weights: A (..., N, dz) Tensor.
        '''
        assert observations.shape.ndims == 3
        num_nodes = tf.shape(observations)[1]
        flat_num_dims = global_num_dims + num_nodes * local_num_dims

        step_size = tf.concat(
            [global_step_size,
             tf.tile(local_step_size, [num_nodes])], axis=0)
        half_step_size = tf.math.divide(step_size, 2.0)

        momentum_scale_diag = tf.math.multiply(momentum_scale_factor,
                                               tf.ones([flat_num_dims]))
        momentum_inv_variance = tf.math.divide(1.0,
                                               tf.square(momentum_scale_diag))

        def cond(t, *unused_args):
            return tf.less(t, num_steps + 1)

        def body(t, position, momentum, inv_temp, *unused_args):
            likelihood = make_likelihood(position)
            neg_log_prob = -tf.math.add(prior.log_prob(position),
                                        likelihood.log_prob(observations))
            with tf.control_dependencies([
                    tf.assert_equal(tf.shape(neg_log_prob),
                                    tf.shape(position)[:-1])
            ]):
                gradients = tf.gradients(neg_log_prob, position)
                gradient = gradients[0]
            with tf.control_dependencies(
                [tf.assert_equal(tf.shape(gradient), tf.shape(position))]):
                new_momentum_tmp = tf.math.subtract(
                    momentum, tf.math.multiply(half_step_size, gradient))

            new_momentum_tmp_rescaled = tf.math.multiply(
                momentum_inv_variance, new_momentum_tmp)
            new_position = tf.math.add(
                position, tf.math.multiply(step_size,
                                           new_momentum_tmp_rescaled))

            new_likelihood = make_likelihood(new_position)
            new_neg_log_prob = -tf.math.add(
                prior.log_prob(new_position),
                new_likelihood.log_prob(observations))
            new_gradient = tf.gradients(new_neg_log_prob, new_position)[0]
            new_momentum = tf.math.subtract(
                new_momentum_tmp, tf.math.multiply(half_step_size,
                                                   new_gradient))

            new_inv_temp = sched_inv_temp(init_inv_temp, t, num_steps)
            tempering_factor = tf.math.sqrt(
                tf.math.divide(inv_temp, new_inv_temp))
            new_tempered_momentum = tf.math.multiply(new_momentum,
                                                     tempering_factor)

            return t + 1, new_position, new_tempered_momentum, new_inv_temp

        initial_position, initial_log_v_prior_prob = \
            variational_prior.sample()
        momentum_loc = tf.zeros_like(initial_position)
        initial_tempered_scale_diag = tf.math.divide(
            momentum_scale_diag, tf.math.sqrt(init_inv_temp))
        initial_momentum_dist = tfd.MultivariateNormalDiag(
            loc=momentum_loc, scale_diag=initial_tempered_scale_diag)
        initial_momentum = initial_momentum_dist.sample(1)[0]
        with tf.control_dependencies([
                tf.assert_equal(tf.shape(initial_momentum),
                                tf.shape(initial_position))
        ]):
            initial_momentum = tf.identity(initial_momentum)
        t1 = tf.constant(1)

        _, position, momentum, inv_temp = tf.while_loop(
            cond, body,
            [t1, initial_position, initial_momentum, init_inv_temp])
        #
        # Static loop:
        #
        # t, position, momentum, inv_temp = \
        #     t1, initial_position, initial_momentum, init_inv_temp
        # for _ in range(num_steps):
        #     t, position, momentum, inv_temp = body(
        #         t, position, momentum, inv_temp
        #     )
        #
        final_position, final_momentum, final_inv_temp = \
            position, momentum, inv_temp
        with tf.control_dependencies([tf.assert_equal(final_inv_temp, 1.0)]):
            final_position = tf.identity(final_position)

        final_momentum_dist = tfd.MultivariateNormalDiag(
            loc=momentum_loc, scale_diag=momentum_scale_diag)
        final_likelihood = make_likelihood(final_position)

        log_jacobian = tf.math.multiply(
            tf.math.divide(util.float(flat_num_dims), 2.0),
            tf.math.log(init_inv_temp))
        log_p = tf.math.add(
            tf.math.add(prior.log_prob(final_position),
                        final_likelihood.log_prob(observations)),
            final_momentum_dist.log_prob(final_momentum))
        log_q = tf.math.subtract(
            tf.math.add(initial_log_v_prior_prob,
                        initial_momentum_dist.log_prob(initial_momentum)),
            log_jacobian)
        return final_position, tf.math.subtract(log_p, log_q)