def recv_func(self, message): dst_feat = message['dst_node_feat'] src_feat = message['src_node_feat'] x = fluid.layers.sequence_pool(dst_feat, 'average') z = fluid.layers.sequence_pool(src_feat, 'average') feat_gate = message['feat_gate'] g_max = fluid.layers.sequence_pool(feat_gate, 'max') g = fluid.layers.concat([x, g_max, z], axis=1) g = fluid.layers.fc(g, self.heads, bias_attr=False, act="sigmoid") # softmax alpha = message['alpha'] alpha = paddle_helper.sequence_softmax(alpha) # E * M feat_value = message['feat_value'] # E * (M * D2) old = feat_value feat_value = fluid.layers.reshape( feat_value, [-1, self.heads, self.hidden_size_v]) # E * M * D2 feat_value = fluid.layers.elementwise_mul(feat_value, alpha, axis=0) feat_value = fluid.layers.reshape( feat_value, [-1, self.heads * self.hidden_size_v]) # E * (M * D2) feat_value = fluid.layers.lod_reset(feat_value, old) feat_value = fluid.layers.sequence_pool(feat_value, 'sum') # N * (M * D2) feat_value = fluid.layers.reshape( feat_value, [-1, self.heads, self.hidden_size_v]) # N * M * D2 output = fluid.layers.elementwise_mul(feat_value, g, axis=0) output = fluid.layers.reshape( output, [-1, self.heads * self.hidden_size_v]) # N * (M * D2) output = fluid.layers.concat([x, output], axis=1) return output
def __call__(self, msg): alpha = msg["alpha"] # lod-tensor (batch_size, num_heads) if attn_drop: old_h = alpha dropout = F.data(name='attn_drop', shape=[1], dtype="int64") u = L.uniform_random(shape=L.cast(L.shape(alpha)[:1], 'int64'), min=0., max=1.) keeped = L.cast(u > dropout, dtype="float32") self_attn_mask = L.scale(x=keeped, scale=10000.0, bias=-1.0, bias_after_scale=False) n_head_self_attn_mask = L.stack(x=[self_attn_mask] * num_heads, axis=1) n_head_self_attn_mask.stop_gradient = True alpha = n_head_self_attn_mask + alpha alpha = L.lod_reset(alpha, old_h) h = msg["v"] alpha = paddle_helper.sequence_softmax(alpha) self.alpha = alpha old_h = h h = h * alpha h = L.lod_reset(h, old_h) h = L.sequence_pool(h, "sum") if concat: h = L.reshape(h, [-1, num_heads * hidden_size]) else: h = L.reduce_mean(h, dim=1) return h
def recv_func(message): nt = message["nt"] att = message["att"] h = message["h"] output_h = [] for i in range(2): mask = L.cast(nt == i, dtype="float32") rel_att = att[:, i:i+1] + ( 1 - mask ) * -10000 rel_att = paddle_helper.sequence_softmax(rel_att) rel_h = L.sequence_pool(h * rel_att * mask, "sum") output_h.append(rel_h) output_h = L.concat(output_h, -1) return output_h
def reduce_attention(msg): alpha = msg["alpha"] # lod-tensor (batch_size, seq_len, num_heads) h = msg["h"] alpha = paddle_helper.sequence_softmax(alpha) old_h = h h = L.reshape(h, [-1, num_heads, hidden_size]) alpha = L.reshape(alpha, [-1, num_heads, 1]) if attn_drop > 1e-15: alpha = L.dropout(alpha, dropout_prob=attn_drop, is_test=is_test, dropout_implementation="upscale_in_train") h = h * alpha h = L.reshape(h, [-1, num_heads * hidden_size]) h = L.lod_reset(h, old_h) return L.sequence_pool(h, "sum")
def recv_score_v_spmm(msg): score = msg["score"] score = paddle_helper.sequence_softmax(score) score = layers.dropout(score, dropout_prob=dropout_rate, dropout_implementation="upscale_in_train", is_test=False) score = L.reshape(score, [-1, n_head, 1]) _v = msg["value"] _new_v = L.reshape(_v, [-1, n_head, _v.shape[-1] // n_head]) _new_v = _new_v * score _new_v = L.reshape(_new_v, [-1, _v.shape[-1]]) _new_v = L.lod_reset(_new_v, _v) return L.sequence_pool(_new_v, "sum")
def reduce_attention(msg): alpha = msg["alpha"] # lod-tensor (batch_size, seq_len, num_heads) h = msg["v"] alpha = paddle_helper.sequence_softmax(alpha) old_h = h if attn_drop > 1e-15: alpha = fluid.layers.dropout( alpha, dropout_prob=attn_drop, is_test=is_test, dropout_implementation="upscale_in_train") h = h * alpha h = fluid.layers.lod_reset(h, old_h) h = fluid.layers.sequence_pool(h, "sum") if concat: h = fluid.layers.reshape(h, [-1, num_heads * hidden_size]) else: h = fluid.layers.reduce_mean(h, dim=1) return h
def recv_func(message): # feature of src and dst node on each edge dst_feat = message['dst_node_feat'] src_feat = message['src_node_feat'] # feature of center node x = fluid.layers.sequence_pool(dst_feat, 'average') # feature of neighbors of center node z = fluid.layers.sequence_pool(src_feat, 'average') # compute gate feat_gate = message['feat_gate'] g_max = fluid.layers.sequence_pool(feat_gate, 'max') g = fluid.layers.concat([x, g_max, z], axis=1) g = fluid.layers.fc(g, heads, bias_attr=False, act="sigmoid") # softmax alpha = message['alpha'] alpha = paddle_helper.sequence_softmax(alpha) # E * M feat_value = message['feat_value'] # E * (M * D2) old = feat_value feat_value = fluid.layers.reshape( feat_value, [-1, heads, hidden_size_v]) # E * M * D2 feat_value = fluid.layers.elementwise_mul(feat_value, alpha, axis=0) feat_value = fluid.layers.reshape( feat_value, [-1, heads * hidden_size_v]) # E * (M * D2) feat_value = fluid.layers.lod_reset(feat_value, old) feat_value = fluid.layers.sequence_pool(feat_value, 'sum') # N * (M * D2) feat_value = fluid.layers.reshape( feat_value, [-1, heads, hidden_size_v]) # N * M * D2 output = fluid.layers.elementwise_mul(feat_value, g, axis=0) output = fluid.layers.reshape( output, [-1, heads * hidden_size_v]) # N * (M * D2) output = fluid.layers.concat([x, output], axis=1) return output
def recv_func(message): # 每条边的终点的特征 dst_feat = message['dst_node_feat'] # 每条边的出发点的特征 src_feat = message['src_node_feat'] # 每个中心点自己的特征 x = L.sequence_pool(dst_feat, 'average') # 每个中心点的邻居的特征的平均值 z = L.sequence_pool(src_feat, 'average') # 计算 gate feat_gate = message['feat_gate'] g_max = L.sequence_pool(feat_gate, 'max') g = L.concat([x, g_max, z], axis=1) g = L.fc(g, heads, bias_attr=False, act="sigmoid") # softmax alpha = message['alpha'] alpha = paddle_helper.sequence_softmax(alpha) # E * M feat_value = message['feat_value'] # E * (M * D2) old = feat_value feat_value = L.reshape(feat_value, [-1, heads, hidden_size_v]) # E * M * D2 feat_value = L.elementwise_mul(feat_value, alpha, axis=0) feat_value = L.reshape(feat_value, [-1, heads * hidden_size_v]) # E * (M * D2) feat_value = L.lod_reset(feat_value, old) feat_value = L.sequence_pool(feat_value, 'sum') # N * (M * D2) feat_value = L.reshape(feat_value, [-1, heads, hidden_size_v]) # N * M * D2 output = L.elementwise_mul(feat_value, g, axis=0) output = L.reshape(output, [-1, heads * hidden_size_v]) # N * (M * D2) output = L.concat([x, output], axis=1) return output
def recv_func(message): dst_feat = message[ 'dst_node_feat'] # feature of dst nodes on each edge src_feat = message[ 'src_node_feat'] # feature of src nodes on each edge x = fluid.layers.sequence_pool(dst_feat, 'average') # feature of center nodes z = fluid.layers.sequence_pool(src_feat, 'average') # mean feature of neighbors # compute gate feat_gate = message['feat_gate'] g_max = fluid.layers.sequence_pool(feat_gate, 'max') g = fluid.layers.concat([x, g_max, z], axis=1) g = fluid.layers.fc(g, heads, bias_attr=False, act='sigmoid') # softmax of attention coefficient alpha = message['alpha'] alpha = paddle_helper.sequence_softmax(alpha) feat_value = message['feat_value'] old = feat_value feat_value = fluid.layers.reshape(feat_value, [-1, heads, hidden_size_v]) feat_value = fluid.layers.elementwise_mul(feat_value, alpha, axis=0) feat_value = fluid.layers.reshape(feat_value, [-1, heads * hidden_size_v]) feat_value = fluid.layers.lod_reset(feat_value, old) feat_value = fluid.layers.sequence_pool(feat_value, 'sum') feat_value = fluid.layers.reshape(feat_value, [-1, heads, hidden_size_v]) output = fluid.layers.elementwise_mul(feat_value, g, axis=0) output = fluid.layers.reshape(output, [-1, heads * hidden_size_v]) output = fluid.layers.concat([x, output], axis=1) return output
def softmax_agg_inside(msg): alpha = paddle_helper.sequence_softmax(msg, beta) msg = msg * alpha return fluid.layers.sequence_pool(msg, "sum")