Exemplo n.º 1
0
        def __call__(self, msg):
            alpha = msg["alpha"]  # lod-tensor (batch_size, num_heads)
            if attn_drop:
                old_h = alpha
                dropout = F.data(name='attn_drop', shape=[1], dtype="int64")
                u = L.uniform_random(shape=L.cast(L.shape(alpha)[:1], 'int64'),
                                     min=0.,
                                     max=1.)
                keeped = L.cast(u > dropout, dtype="float32")
                self_attn_mask = L.scale(x=keeped,
                                         scale=10000.0,
                                         bias=-1.0,
                                         bias_after_scale=False)
                n_head_self_attn_mask = L.stack(x=[self_attn_mask] * num_heads,
                                                axis=1)
                n_head_self_attn_mask.stop_gradient = True
                alpha = n_head_self_attn_mask + alpha
                alpha = L.lod_reset(alpha, old_h)

            h = msg["v"]
            alpha = paddle_helper.sequence_softmax(alpha)

            self.alpha = alpha
            old_h = h
            h = h * alpha
            h = L.lod_reset(h, old_h)
            h = L.sequence_pool(h, "sum")

            if concat:
                h = L.reshape(h, [-1, num_heads * hidden_size])
            else:
                h = L.reduce_mean(h, dim=1)
            return h
Exemplo n.º 2
0
def create_model(args, config):
    """Create model for given model configuration."""
    logging.info('building model')
    graph_wrapper = GraphWrapper(name="graph",
                                 node_feat=[('atom_type', [None, 1], "int64"),
                                            ('chirality_tag', [None,
                                                               1], "int64")],
                                 edge_feat=[('bond_type', [None, 1], "int64"),
                                            ('bond_direction', [None,
                                                                1], "int64")])

    # NOTE: [num_nodes, num_graphs], bs = num_graphs
    pos_mask = L.data(name='pos_mask',
                      shape=[-1, args.batch_size],
                      dtype='float32')
    neg_mask = L.data(name='neg_mask',
                      shape=[-1, args.batch_size],
                      dtype='float32')

    encoder = GINEncoder(config)
    global_repr, patch_summary = encoder.forward(graph_wrapper)

    global_D = FF(encoder.embedding_dim)
    local_D = FF(encoder.embedding_dim)
    g_enc = global_D.forward(global_repr)
    l_enc = local_D.forward(patch_summary)

    res = L.matmul(l_enc, g_enc, transpose_y=True)
    E_pos = get_positive_expectation(res * pos_mask,
                                     config['measure'],
                                     average=False)
    E_pos = L.reduce_sum(E_pos) / graph_wrapper.num_nodes
    E_neg = get_negative_expectation(res * neg_mask,
                                     config['measure'],
                                     average=False)
    E_neg = L.reduce_sum(E_neg) / (graph_wrapper.num_nodes *
                                   (graph_wrapper.num_graph - 1))
    local_global_loss = E_neg - E_pos

    if config['prior']:
        prior_D = PriorDiscriminator(encoder.embedding_dim)
        prior = L.uniform_random([args.batch_size, encoder.embedding_dim],
                                 min=0.0,
                                 max=1.0)
        term_1 = L.reduce_mean(L.log(prior_D.forward(prior)))
        term_2 = L.reduce_mean(L.log(1.0 - prior_D.forward(global_repr)))
        prior_loss = -(term_1 + term_2) * config['gamma']
    else:
        prior_loss = 0

    total_loss = local_global_loss + prior_loss

    keys = ['loss', 'graph_wrapper', 'encoder', 'graph_emb']
    Agent = namedtuple('Agent', keys)
    return Agent(loss=total_loss,
                 graph_wrapper=graph_wrapper,
                 encoder=encoder,
                 graph_emb=global_repr)
Exemplo n.º 3
0
 def forward(self, *items):
     """Forward network"""
     if self.training and self.p > 0:
         masks = [
             layers.uniform_random(shape=x.shape[:2], min=0, max=1) >=
             self.p for x in items
         ]
         masks = [layers.cast(x, 'float32') for x in masks]
         total = layers.elementwise_add(*masks)
         scale = len(items) / layers.elementwise_max(
             total, layers.ones_like(total))
         masks = [mask * scale for mask in masks]
         items = [
             item * layers.unsqueeze(mask, axes=[-1])
             for item, mask in zip(items, masks)
         ]
     return items
Exemplo n.º 4
0
    def __init__(self, graph_wrapper, dropout, keep_self_loop=True):
        super(DropEdgeWrapper, self).__init__()

        # Copy Node's information
        for key, value in graph_wrapper.node_feat.items():
            self.node_feat_tensor_dict[key] = value

        self._num_nodes = graph_wrapper.num_nodes
        self._graph_lod = graph_wrapper.graph_lod
        self._num_graph = graph_wrapper.num_graph

        # Dropout Edges
        src, dst = graph_wrapper.edges
        u = L.uniform_random(shape=L.cast(L.shape(src), 'int64'),
                             min=0.,
                             max=1.)

        # Avoid Empty Edges
        keeped = L.cast(u > dropout, dtype="float32")
        self._num_edges = L.reduce_sum(L.cast(keeped, "int32"))
        keeped = keeped + L.cast(self._num_edges == 0, dtype="float32")

        if keep_self_loop:
            self_loop = L.cast(src == dst, dtype="float32")
            keeped = keeped + self_loop

        keeped = (keeped > 0.5)
        src = paddle_helper.masked_select(src, keeped)
        dst = paddle_helper.masked_select(dst, keeped)
        src.stop_gradient = True
        dst.stop_gradient = True
        self._edges_src = src
        self._edges_dst = dst

        for key, value in graph_wrapper.edge_feat.items():
            self.edge_feat_tensor_dict[key] = paddle_helper.masked_select(
                value, keeped)

        self._edge_uniq_dst, _, uniq_count = L.unique_with_counts(
            dst, dtype="int32")
        self._edge_uniq_dst.stop_gradient = True
        last = L.reduce_sum(uniq_count, keep_dim=True)
        uniq_count = L.cumsum(uniq_count, exclusive=True)
        self._edge_uniq_dst_count = L.concat([uniq_count, last])
        self._edge_uniq_dst_count.stop_gradient = True
        self._indegree = get_degree(self._edges_dst, self._num_nodes)
Exemplo n.º 5
0
def build_edges(num_nodes, input_mask, max_seqlen):
    edges = L.range(start=0, end=num_nodes, step=1, dtype="int32")
    all_edges = []
    # Window
    filter_func = lambda x, y: select_edges(x, y, input_mask, num_nodes,
                                            max_seqlen)

    all_edges.append(filter_func(edges - 1, edges))  # win-1
    all_edges.append(filter_func(edges + 1, edges))  # win-2
    all_edges.append(filter_func(edges, edges))  #self-loop

    # Global Assume [CLS] is the first token.

    # vertical cls-window attention
    cls_position = edges / max_seqlen * max_seqlen
    all_edges.append(filter_func(cls_position, edges))

    # horizontal cls attention
    all_edges.append(filter_func(edges, cls_position))

    # Random
    for i in range(2):
        rand_edge = L.floor(
            L.uniform_random(min=0, max=1, shape=[num_nodes]) *
            L.cast(max_seqlen, dtype="float32"))
        rand_edge = L.cast(rand_edge, dtype="int32") + cls_position
        all_edges.append(filter_func(rand_edge, edges))

    if len(all_edges) > 1:
        src = L.concat([s for s, d in all_edges], 0)
        dst = L.concat([d for s, d in all_edges], 0)
    else:
        src = all_edges[0][0]
        dst = all_edges[0][1]

    # sort edges
    sorted_src, sorted_dst = uniq_edges(src, dst, num_nodes)
    return sorted_src, sorted_dst
Exemplo n.º 6
0
 def get_mask(x, p):
     """Generate the mask matrix of the dropout by the input."""
     mask = layers.uniform_random(shape=x.shape, min=0, max=1) >= p
     mask = layers.cast(mask, 'float32')
     mask = mask / (1 - p)
     return mask