def infer(self, inputs, outputs): """Run model inference. Only support generation now. """ if self.do_generation: return self.generator.inference(self, inputs, outputs) else: tgt_logits = self._calc_logits(outputs["enc_out"], inputs["tgt_idx"]) tgt_lm_loss = layers.softmax_with_cross_entropy( logits=tgt_logits, label=inputs["tgt_label"]) lm_loss = layers.fill_constant_batch_size_like( outputs["enc_out"], [-1], self.dtype, 0) lm_loss = layers.scatter(lm_loss, inputs["tgt_idx"][:, 0], tgt_lm_loss[:, 0], overwrite=False) tokens_num = layers.fill_constant_batch_size_like( outputs["enc_out"], [-1], self.dtype, 0) tgt_tokens_num = layers.fill_constant_batch_size_like( tgt_lm_loss, [-1], self.dtype, 1) tokens_num = layers.scatter(tokens_num, inputs["tgt_idx"][:, 0], tgt_tokens_num, overwrite=False) predictions = { "lm_loss": lm_loss, "tokens_num": tokens_num, "data_id": inputs["data_id"] } return predictions
def label_embed_input(self, feature): label = F.data(name="label", shape=[None, 1], dtype="int64") label_idx = F.data(name='label_idx', shape=[None], dtype="int64") label = L.reshape(label, shape=[-1]) label = L.gather(label, label_idx, overwrite=False) lay_norm_attr = F.ParamAttr( initializer=F.initializer.ConstantInitializer(value=1)) lay_norm_bias = F.ParamAttr( initializer=F.initializer.ConstantInitializer(value=0)) feature = L.layer_norm(feature, name='layer_norm_feature_input1', param_attr=lay_norm_attr, bias_attr=lay_norm_bias) embed_attr = F.ParamAttr( initializer=F.initializer.NormalInitializer(loc=0.0, scale=1.0)) embed = F.embedding(input=label, size=(self.out_size, self.embed_size), param_attr=embed_attr) lay_norm_attr = F.ParamAttr( initializer=F.initializer.ConstantInitializer(value=1)) lay_norm_bias = F.ParamAttr( initializer=F.initializer.ConstantInitializer(value=0)) embed = L.layer_norm(embed, name='layer_norm_feature_input2', param_attr=lay_norm_attr, bias_attr=lay_norm_bias) embed = L.relu(embed) feature_label = L.gather(feature, label_idx, overwrite=False) feature_label = feature_label + embed feature = L.scatter(feature, label_idx, feature_label, overwrite=True) return feature
def get_degree(edge, num_nodes): init_output = L.fill_constant(shape=[num_nodes], value=0, dtype="float32") init_output.stop_gradient = True final_output = L.scatter(init_output, edge, L.full_like(edge, 1, dtype="float32"), overwrite=False) return final_output
def sag_pool(gw, feature, ratio, graph_id, dataset, name, activation=L.tanh): """Implementation of self-attention graph pooling (SAGPool) This is an implementation of the paper SELF-ATTENTION GRAPH POOLING (https://arxiv.org/pdf/1904.08082.pdf) Args: gw: Graph wrapper object. feature: A tensor with shape (num_nodes, feature_size). ratio: The pooling ratio of nodes we want to select. graph_id: The graphs that the nodes belong to. dataset: To differentiate FRANKENSTEIN dataset and other datasets. name: The name of SAGPool layer. activation: The activation function. Return: new_feature: A tensor with shape (num_nodes, feature_size), and the unselected nodes' feature is masked by zero. ratio_length: The selected node numbers of each graph. """ if dataset == "FRANKENSTEIN": gcn_ = gcn else: gcn_ = norm_gcn score = gcn_(gw=gw, feature=feature, hidden_size=1, activation=None, norm=gw.node_feat["norm"], name=name) score = L.squeeze(score, axes=[]) perm, ratio_length = topk_pool(gw, score, graph_id, ratio) mask = L.zeros_like(score) mask = L.cast(mask, dtype="float32") updates = L.ones_like(perm) updates = L.cast(updates, dtype="float32") mask = L.scatter(mask, perm, updates) new_feature = L.elementwise_mul(feature, mask, axis=0) temp_score = activation(score) new_feature = L.elementwise_mul(new_feature, temp_score, axis=0) return new_feature, ratio_length
def label_embed_input(self, feature): label = F.data(name="label", shape=[None, self.out_size], dtype="int64") label_idx = F.data(name='label_idx', shape=[None], dtype="int64") label = L.gather(label, label_idx, overwrite=False) label = L.cast(label, dtype="float32") label_feat = self.embed_input(label, "label_feat") feature_label = L.gather(feature, label_idx, overwrite=False) feature_label = feature_label + label_feat feature = L.scatter(feature, label_idx, feature_label, overwrite=True) return feature
def fluid_sequence_scatter(input, index, value): """ args: input: 1-level LoDTensor index: 1-d tensor of the sequence index value: scalar return: output = input output[index + offset] = updates lod_set(output, input) """ offset = fluid_sequence_get_offset(input) offset_index = index + offset offset_index.stop_gradient = True updates = fluid.layers.fill_constant_batch_size_like(input, shape=input.shape, value=value, dtype=input.dtype) output = layers.scatter(input, layers.cast(offset_index, 'int32'), updates) return layers.lod_reset(output, input)
def test_scatter(self): program = Program() with program_guard(program): x = layers.data(name='x', shape=[3, 3], append_batch_size=False, dtype='float32') idx = layers.data(name='idx', shape=[2], append_batch_size=False, dtype='int32') updates = layers.data(name='updates', shape=[2, 3], append_batch_size=False, dtype='float32') out = layers.scatter(input=x, index=idx, updates=updates) self.assertIsNotNone(out) print(str(program))
def scatter_max(input, index, updates): """Scatter max updates to input by given index. Adds sparse updates to input variables. Args: input: Input tensor to be updated index: Slice index updates: Must have same type as input. Return: Same type and shape as input. """ output = L.scatter(input, index, updates, mode='max') return output
def scatter_add(input, index, updates): """Scatter add updates to input by given index. Adds sparse updates to input variables. Args: input: Input tensor to be updated index: Slice index updates: Must have same type as input. Return: Same type and shape as input. """ output = L.scatter(input, index, updates, overwrite=False) return output
def label_embed_input(self, feature): label = F.data(name="label_all", shape=[None, 1], dtype="int64") label_idx = F.data(name='label_idx', shape=[None], dtype="int64") label = L.reshape(label, shape=[-1]) # label = L.index_select(label, label_idx) label = L.gather(label, label_idx, overwrite=False) embed_attr = F.ParamAttr( initializer=F.initializer.NormalInitializer(loc=0.0, scale=1.0)) embed = F.embedding(input=label, size=(self.out_size, self.embed_size), param_attr=embed_attr) # feature_label = L.index_select(feature, label_idx) feature_label = L.gather(feature, label_idx, overwrite=False) feature_label = feature_label + embed feature = L.scatter(feature, label_idx, feature_label, overwrite=True) return feature
def fluid_sequence_scatter(input, index, offset, updates): """ args: input: 1-level LoDTensor, 'float32' only index: 1-d tensor of the sequence index, 'int32' only offset: the same shape and dtype as index updates: (len(index), input[1:]) return: output = input output[index + offset] = updates lod_set(output, input) """ # assert input.lod_level == 1, input assert index.shape == offset.shape assert input.shape[1:] == updates.shape[1:] new_index = index + offset new_index.stop_gradient = True output = layers.scatter(input, new_index, updates) return layers.lod_reset(output, input)
def recv(dst, uniq_dst, bucketing_index, msg, reduce_function, num_nodes, num_edges): """Recv message from given msg to dst nodes. """ if reduce_function == "sum": if isinstance(msg, dict): raise TypeError("The message for build-in function" " should be Tensor not dict.") try: out_dim = msg.shape[-1] init_output = L.fill_constant(shape=[num_nodes, out_dim], value=0, dtype=msg.dtype) init_output.stop_gradient = False empty_msg_flag = L.cast(num_edges > 0, dtype=msg.dtype) msg = msg * empty_msg_flag output = paddle_helper.scatter_add(init_output, dst, msg) return output except TypeError as e: warnings.warn( "scatter_add is not supported with paddle version <= 1.5") def sum_func(message): return L.sequence_pool(message, "sum") reduce_function = sum_func bucketed_msg = op.nested_lod_reset(msg, bucketing_index) output = reduce_function(bucketed_msg) output_dim = output.shape[-1] empty_msg_flag = L.cast(num_edges > 0, dtype=output.dtype) output = output * empty_msg_flag init_output = L.fill_constant(shape=[num_nodes, output_dim], value=0, dtype=output.dtype) init_output.stop_gradient = True final_output = L.scatter(init_output, uniq_dst, output) return final_output
def topk_pool(gw, score, graph_id, ratio): """Implementation of topk pooling, where k means pooling ratio. Args: gw: Graph wrapper object. score: The attention score of all nodes, which is used to select important nodes. graph_id: The graphs that the nodes belong to. ratio: The pooling ratio of nodes we want to select. Return: perm: The index of nodes we choose. ratio_length: The selected node numbers of each graph. """ graph_lod = gw.graph_lod graph_nodes = gw.num_nodes num_graph = gw.num_graph num_nodes = L.ones(shape=[graph_nodes], dtype="float32") num_nodes = L.lod_reset(num_nodes, graph_lod) num_nodes_per_graph = L.sequence_pool(num_nodes, pool_type='sum') max_num_nodes = L.reduce_max(num_nodes_per_graph, dim=0) max_num_nodes = L.cast(max_num_nodes, dtype="int32") index = L.arange(0, gw.num_nodes, dtype="int64") offset = L.gather(graph_lod, graph_id, overwrite=False) index = (index - offset) + (graph_id * max_num_nodes) index.stop_gradient = True # padding dense_score = L.fill_constant(shape=[num_graph * max_num_nodes], dtype="float32", value=-999999) index = L.reshape(index, shape=[-1]) dense_score = L.scatter(dense_score, index, updates=score) num_graph = L.cast(num_graph, dtype="int32") dense_score = L.reshape(dense_score, shape=[num_graph, max_num_nodes]) # record the sorted index _, sort_index = L.argsort(dense_score, axis=-1, descending=True) # recover the index range graph_lod = graph_lod[:-1] graph_lod = L.reshape(graph_lod, shape=[-1, 1]) graph_lod = L.cast(graph_lod, dtype="int64") sort_index = L.elementwise_add(sort_index, graph_lod, axis=-1) sort_index = L.reshape(sort_index, shape=[-1, 1]) # use sequence_slice to choose selected node index pad_lod = L.arange(0, (num_graph + 1) * max_num_nodes, step=max_num_nodes, dtype="int32") sort_index = L.lod_reset(sort_index, pad_lod) ratio_length = L.ceil(num_nodes_per_graph * ratio) ratio_length = L.cast(ratio_length, dtype="int64") ratio_length = L.reshape(ratio_length, shape=[-1, 1]) offset = L.zeros(shape=[num_graph, 1], dtype="int64") choose_index = L.sequence_slice(input=sort_index, offset=offset, length=ratio_length) perm = L.reshape(choose_index, shape=[-1]) return perm, ratio_length