def forward(self, is_test=False): """ Build the network. """ substruct_graph_wrapper = GraphWrapper( name="graph", node_feat=[('atom_type', [None, 1], "int64"), ('chirality_tag', [None, 1], "int64")], edge_feat=[('bond_type', [None, 1], "int64"), ('bond_direction', [None, 1], "int64")]) context_graph_wrapper = GraphWrapper( name="context_graph", node_feat=[('atom_type', [None, 1], "int64"), ('chirality_tag', [None, 1], "int64")], edge_feat=[('bond_type', [None, 1], "int64"), ('bond_direction', [None, 1], "int64")]) substruct_center_idx = layers.data(name="substruct_center_idx", shape=[-1, 1], dtype="int64") context_overlap_idx = layers.data(name="context_overlap_idx", shape=[-1, 1], dtype="int64") context_overlap_lod = layers.data(name="context_overlap_lod", shape=[1, -1], dtype="int32") context_cycle_index = layers.data(name="context_cycle_index", shape=[-1, 1], dtype="int64") substruct_node_repr = self.substruct_model.forward( substruct_graph_wrapper, is_test=is_test) substruct_repr = layers.gather(substruct_node_repr, substruct_center_idx) context_node_repr = self.context_model.forward(context_graph_wrapper, is_test=is_test) context_overlap_repr = layers.gather(context_node_repr, context_overlap_idx) context_repr = layers.sequence_pool( layers.lod_reset(context_overlap_repr, context_overlap_lod), self.context_pooling) neg_context_repr = layers.gather(context_repr, context_cycle_index) pred_pos = layers.reduce_sum(substruct_repr * context_repr, 1) pred_neg = layers.reduce_sum(substruct_repr * neg_context_repr, 1) label_pos = pred_pos * 0.0 + 1.0 label_pos.stop_gradient = True label_neg = pred_neg * 0.0 label_neg.stop_gradient = True loss = layers.sigmoid_cross_entropy_with_logits(x=pred_pos, label=label_pos) \ + layers.sigmoid_cross_entropy_with_logits(x=pred_neg, label=label_neg) loss = layers.reduce_mean(loss) self.substruct_graph_wrapper = substruct_graph_wrapper self.context_graph_wrapper = context_graph_wrapper self.loss = loss
def node2vec_model(graph, hidden_size=16, neg_num=5): pyreader = l.py_reader( capacity=70, shapes=[[-1, 1, 1], [-1, 1, 1], [-1, neg_num, 1]], dtypes=['int64', 'int64', 'int64'], lod_levels=[0, 0, 0], name='train', use_double_buffer=True) embed_init = fluid.initializer.UniformInitializer(low=-1.0, high=1.0) weight_init = fluid.initializer.TruncatedNormal(scale=1.0 / math.sqrt(hidden_size)) src, pos, negs = l.read_file(pyreader) embed_src = l.embedding( input=src, size=[graph.num_nodes, hidden_size], param_attr=fluid.ParamAttr( name='content', initializer=embed_init)) weight_pos = l.embedding( input=pos, size=[graph.num_nodes, hidden_size], param_attr=fluid.ParamAttr( name='weight', initializer=weight_init)) weight_negs = l.embedding( input=negs, size=[graph.num_nodes, hidden_size], param_attr=fluid.ParamAttr( name='weight', initializer=weight_init)) pos_logits = l.matmul( embed_src, weight_pos, transpose_y=True) # [batch_size, 1, 1] neg_logits = l.matmul( embed_src, weight_negs, transpose_y=True) # [batch_size, 1, neg_num] ones_label = pos_logits * 0. + 1. ones_label.stop_gradient = True pos_loss = l.sigmoid_cross_entropy_with_logits(pos_logits, ones_label) zeros_label = neg_logits * 0. zeros_label.stop_gradient = True neg_loss = l.sigmoid_cross_entropy_with_logits(neg_logits, zeros_label) loss = (l.reduce_mean(pos_loss) + l.reduce_mean(neg_loss)) / 2 return pyreader, loss
def create_model(args, config, graph_label): """Create model for given model configuration.""" logging.info('building model') graph_wrapper = GraphWrapper(name="graph", node_feat=[('atom_type', [None, 1], "int64"), ('chirality_tag', [None, 1], "int64")], edge_feat=[('bond_type', [None, 1], "int64"), ('bond_direction', [None, 1], "int64")]) encoder = GINEncoder(config) global_repr, patch_summary = encoder.forward(graph_wrapper) hid = L.fc(global_repr, config['hidden_size'], act='relu', name='finetune_fc1') hid = L.fc(hid, config['hidden_size'], act='relu', name='finetune_fc2') logits = L.fc(global_repr, args.num_tasks, name="finetune_fc3") loss = L.sigmoid_cross_entropy_with_logits(x=logits, label=graph_label) loss = L.reduce_mean(loss) pred = L.sigmoid(logits) keys = ['loss', 'graph_wrapper', 'encoder', 'graph_emb', 'pred'] Agent = namedtuple('Agent', keys) return Agent(loss=loss, graph_wrapper=graph_wrapper, encoder=encoder, graph_emb=global_repr, pred=pred)
def forward(self, is_test=False): """tbd""" graph_wrapper = GraphWrapper(name="graph", node_feat=[ ('atom_type', [None, 1], "int64"), ('chirality_tag', [None, 1], "int64") ], edge_feat=[ ('bond_type', [None, 1], "int64"), ('bond_direction', [None, 1], "int64") ]) supervised_label = layers.data(name="supervised_label", shape=[None, self.task_num], dtype="float32") valid = layers.data("valid", shape=[None, self.task_num], dtype="float32") node_repr = self.gnn_model.forward(graph_wrapper, is_test=is_test) graph_repr = pgl.layers.graph_pooling(graph_wrapper, node_repr, self.pool_type) logits = layers.fc(graph_repr, size=self.task_num, name="pretrain_supervised_fc") loss = layers.sigmoid_cross_entropy_with_logits(x=logits, label=supervised_label) loss = layers.reduce_sum(loss * valid) / layers.reduce_sum(valid) self.graph_wrapper = graph_wrapper self.loss = loss
def link_predict_model(num_nodes, hidden_size=16, name='link_predict_task', binary_op_type="Weighted-L2"): pyreader = l.py_reader(capacity=70, shapes=[[-1, 1], [-1, 1], [-1, 1]], dtypes=['int64', 'int64', 'int64'], lod_levels=[0, 0, 0], name=name + '_pyreader', use_double_buffer=True) u, v, label = l.read_file(pyreader) u_embed = l.embedding(input=u, size=[num_nodes, hidden_size], param_attr=fluid.ParamAttr(name='content')) v_embed = l.embedding(input=v, size=[num_nodes, hidden_size], param_attr=fluid.ParamAttr(name='content')) u_embed.stop_gradient = True v_embed.stop_gradient = True edge_embed = binary_op(u_embed, v_embed, binary_op_type) logit = l.fc(input=edge_embed, size=1) loss = l.sigmoid_cross_entropy_with_logits(logit, l.cast(label, 'float32')) loss = l.reduce_mean(loss) prob = l.sigmoid(logit) return pyreader, loss, prob, label
def bce_Loss(input, target): loss = layers.sigmoid_cross_entropy_with_logits(x=input, label=target, ignore_index=-1, normalize=True) loss = layers.reduce_sum(loss) return loss
def __call__(self, x, label): out = sigmoid_cross_entropy_with_logits(x, label) if self.reduction == 'sum': return reduce_sum(out) elif self.reduction == 'mean': return reduce_mean(out) else: return out
def test_sigmoid_cross_entropy(self): program = Program() with program_guard(program): dat = layers.data(name='data', shape=[10], dtype='float32') lbl = layers.data(name='label', shape=[10], dtype='float32') self.assertIsNotNone( layers.sigmoid_cross_entropy_with_logits(x=dat, label=lbl)) print(str(program))
def train_program(self, ): label = F.data(name="label", shape=[None, 112], dtype="int64") train_idx = F.data(name='train_idx', shape=[None], dtype="int64") prediction = L.gather(self.out_feat, train_idx, overwrite=False) label = L.gather(label, train_idx, overwrite=False) label = L.cast(label, dtype="float32") cost = L.sigmoid_cross_entropy_with_logits(x=prediction, label=label) avg_cost = L.mean(cost) self.avg_cost = avg_cost
def test_sigmoid_cross_entropy(self): program = Program() with program_guard(program): dat = layers.data(name='data', shape=[10], dtype='float32') lbl = layers.data(name='label', shape=[10], dtype='float32') self.assertIsNotNone( layers.sigmoid_cross_entropy_with_logits( x=dat, label=lbl)) print(str(program))
def forward(self): """ forward """ src, dst = L.read_file(self.pyreader) if self.is_sparse: # sparse mode use 2 dims input. src = L.reshape(src, [-1, 1]) dst = L.reshape(dst, [-1, 1]) src_embed = split_embedding(src, self.num_nodes, self.hidden_size, self.embed_init, "weight", self.num_part, self.is_sparse) dst_embed = split_embedding(dst, self.num_nodes, self.hidden_size, self.embed_init, "weight", self.num_part, self.is_sparse) if self.is_sparse: src_embed = L.reshape(src_embed, [-1, 1, self.num_featuers, self.hidden_size]) dst_embed = L.reshape( dst_embed, [-1, self.neg_num + 1, self.num_featuers, self.hidden_size]) src_embed = L.reduce_mean(src_embed, 2) dst_embed = L.reduce_mean(dst_embed, 2) logits = L.matmul(src_embed, dst_embed, transpose_y=True) # [batch_size, 1, neg_num+1] pos_label = L.fill_constant_batch_size_like(logits, [-1, 1, 1], "float32", 1) neg_label = L.fill_constant_batch_size_like(logits, [-1, 1, self.neg_num], "float32", 0) label = L.concat([pos_label, neg_label], -1) pos_weight = L.fill_constant_batch_size_like(logits, [-1, 1, 1], "float32", self.neg_num) neg_weight = L.fill_constant_batch_size_like(logits, [-1, 1, self.neg_num], "float32", 1) weight = L.concat([pos_weight, neg_weight], -1) weight.stop_gradient = True label.stop_gradient = True loss = L.sigmoid_cross_entropy_with_logits(logits, label) loss = loss * weight loss = L.reduce_mean(loss) loss = loss * ((self.neg_num + 1) / 2 / self.neg_num) loss.persistable = True self.loss = loss return loss
def build_model(self): node_features = self.graph_wrapper.node_feat["feat"] output = self.gcn(gw=self.graph_wrapper, feature=node_features, hidden_size=self.hidden_size, activation="relu", norm=self.graph_wrapper.node_feat["norm"], name="gcn_layer_1") output1 = output output = self.gcn(gw=self.graph_wrapper, feature=output, hidden_size=self.hidden_size, activation="relu", norm=self.graph_wrapper.node_feat["norm"], name="gcn_layer_2") output2 = output output = self.gcn(gw=self.graph_wrapper, feature=output, hidden_size=self.hidden_size, activation="relu", norm=self.graph_wrapper.node_feat["norm"], name="gcn_layer_3") output = L.concat(input=[output1, output2, output], axis=-1) output, ratio_length = sag_pool(gw=self.graph_wrapper, feature=output, ratio=self.pooling_ratio, graph_id=self.graph_id, dataset=self.args.dataset_name, name="sag_pool_1") output = L.lod_reset(output, self.graph_wrapper.graph_lod) cat1 = L.sequence_pool(output, "sum") ratio_length = L.cast(ratio_length, dtype="float32") cat1 = L.elementwise_div(cat1, ratio_length, axis=-1) cat2 = L.sequence_pool(output, "max") output = L.concat(input=[cat2, cat1], axis=-1) output = L.fc(output, size=self.hidden_size, act="relu") output = L.dropout(output, dropout_prob=self.dropout_ratio) output = L.fc(output, size=self.hidden_size // 2, act="relu") output = L.fc(output, size=self.num_classes, act=None, param_attr=fluid.ParamAttr(name="final_fc")) self.labels = L.cast(self.labels, dtype="float32") loss = L.sigmoid_cross_entropy_with_logits(x=output, label=self.labels) self.loss = L.mean(loss) pred = L.sigmoid(output) self.pred = L.argmax(x=pred, axis=-1) correct = L.equal(self.pred, self.labels_1dim) correct = L.cast(correct, dtype="int32") self.correct = L.reduce_sum(correct)
def forward(self, is_test=False): """ Define the forward function,set the parameter layer options. Graph wrapper creates a graph data holders that attributes and features in the graph are :code:`fluid.layers.data`.And we provide interface : code:`to_feed` to help converting :code:`Graph`data into :code:`feed_dict`. Args: name: The graph data prefix,here is graph node_feat: A list of tuples that decribe the details of node feature tenosr. Each tuple must be (name, shape, dtype) and the first dimension of the shape must be set unknown (-1 or None) or we can easily use :code:`Graph.node_feat_info()` to get the node_feat settings. edge_feat: A list of tuples that decribe the details of edge feature tenosr. Each tuple mush be (name, shape, dtype) and the first dimension of the shape must be set unknown (-1 or None) or we can easily use :code:`Graph.edge_feat_info()` to get the edge_feat settings. """ graph_wrapper = GraphWrapper(name="graph", node_feat=[ ('atom_type', [None, 1], "int64"), ('chirality_tag', [None, 1], "int64") ], edge_feat=[ ('bond_type', [None, 1], "int64"), ('bond_direction', [None, 1], "int64") ]) finetune_label = layers.data(name="finetune_label", shape=[None, self.num_tasks], dtype="float32") valid = layers.data("valid", shape=[None, self.num_tasks], dtype="float32") node_repr = self.gnn_model.forward(graph_wrapper, is_test=is_test) graph_repr = pgl.layers.graph_pooling(graph_wrapper, node_repr, self.pool_type) logits = layers.fc(graph_repr, size=self.num_tasks, name="finetune_fc") loss = layers.sigmoid_cross_entropy_with_logits(x=logits, label=finetune_label) loss = layers.reduce_sum(loss * valid) / layers.reduce_sum(valid) pred = layers.sigmoid(logits) self.graph_wrapper = graph_wrapper self.loss = loss self.pred = pred self.finetune_label = finetune_label
def pointwise_loss(self): """point wise model""" self.logits = L.reduce_sum(self.query_repr * self.poi_repr, -1) self.score = L.sigmoid(self.logits) self.loss = L.sigmoid_cross_entropy_with_logits( L.reshape(self.logits, [-1, 1]), L.reshape(self.labels, [-1, 1])) auc_label = L.cast(self.labels, dtype="int64") auc_label.stop_gradients = True _, self.batch_auc, _ = L.auc( L.reshape(self.score, [-1, 1]), L.reshape(auc_label, [-1, 1])) self.metrics = [L.reduce_mean(self.loss), self.batch_auc] self.loss = L.reduce_mean(self.loss)
def __init__(self, x, y, y_aux, cfg): self.program = fluid.default_main_program().clone() with fluid.program_guard(self.program): model = ACGAN(cfg.latent_size, cfg.num_classes) self.fake, self.aux = model.network_d(x, name='d') self.fake_loss = layers.sigmoid_cross_entropy_with_logits( x=self.fake, label=y) self.aux_loss = layers.softmax_with_cross_entropy(logits=self.aux, label=y_aux) self.unweighted_loss = layers.reduce_sum(self.fake_loss + self.aux_loss) self.infer_program = self.program.clone(for_test=True) # we don't want the discriminator to also maximize the classification # accuracy of the auxiliary classifier on generated images, so we # don't train discriminator to produce class labels for generated # images (see https://openreview.net/forum?id=rJXTf9Bxg). # To preserve sum of sample weights for the auxiliary classifier, # we assign sample weight of 2 to the real images. fake_loss_weight = layers.ones(shape=[cfg.batch_size * 2, 1], dtype='float32') aux_loss_weight_zeros = layers.zeros(shape=[cfg.batch_size, 1], dtype='float32') aux_loss_weight_twos = layers.fill_constant( shape=[cfg.batch_size, 1], value=2.0, dtype='float32') aux_loss_weight = layers.concat( [aux_loss_weight_twos, aux_loss_weight_zeros]) self.fake_loss = layers.elementwise_mul(self.fake_loss, fake_loss_weight) self.aux_loss = layers.elementwise_mul(self.aux_loss, aux_loss_weight) self.loss = layers.reduce_sum(self.fake_loss) + layers.reduce_sum( self.aux_loss) vars = [] for var in self.program.list_vars(): if fluid.io.is_parameter(var) and (var.name.startswith("d")): vars.append(var.name) optimizer = fluid.optimizer.Adam(learning_rate=cfg.adam_lr, beta1=cfg.adam_beta_1, name="net_d") optimizer.minimize(self.loss, parameter_list=vars)
def build_model(self): node_features = self.atom_encoder(self.graph_wrapper.node_feat['feat']) edge_features = self.bond_encoder(self.graph_wrapper.edge_feat['feat']) self._enc_out = self.node_repr_encode(node_features, edge_features) logits = L.fc(self._enc_out, self.args.num_class, act=None, param_attr=F.ParamAttr(name="final_fc")) # L.Print(self.labels, message="labels") # L.Print(self.unmask, message="unmask") loss = L.sigmoid_cross_entropy_with_logits(x=logits, label=self.labels) loss = loss * self.unmask self.loss = L.reduce_sum(loss) / L.reduce_sum(self.unmask) self.pred = L.sigmoid(logits) self._metrics = Metric(loss=self.loss)
def node_classify_model(graph, num_labels, hidden_size=16, name='node_classify_task'): pyreader = l.py_reader(capacity=70, shapes=[[-1, 1], [-1, num_labels]], dtypes=['int64', 'float32'], lod_levels=[0, 0], name=name + '_pyreader', use_double_buffer=True) nodes, labels = l.read_file(pyreader) embed_nodes = l.embedding(input=nodes, size=[graph.num_nodes, hidden_size], param_attr=fluid.ParamAttr(name='content')) embed_nodes.stop_gradient = True logits = l.fc(input=embed_nodes, size=num_labels) loss = l.sigmoid_cross_entropy_with_logits(logits, labels) loss = l.reduce_mean(loss) prob = l.sigmoid(logits) topk = l.reduce_sum(labels, -1) return pyreader, loss, prob, labels, topk
def train(self, is_test=False): """ Used for train/test with labels and train loss. """ graph_wrapper, logits = self.forward(is_test=is_test) finetune_label = layers.data(name="finetune_label", shape=[None, self.num_tasks], dtype="float32") valid = layers.data("valid", shape=[None, self.num_tasks], dtype="float32") loss = layers.sigmoid_cross_entropy_with_logits(x=logits, label=finetune_label) loss = layers.reduce_sum(loss * valid) / layers.reduce_sum(valid) pred = layers.sigmoid(logits) self.graph_wrapper = graph_wrapper self.loss = loss self.pred = pred self.finetune_label = finetune_label
def node_classify_model(graph, num_labels, embed_dim=16, name='node_classify_task'): """Build node classify model. Args: graph: The :code:`Graph` data object. num_labels: The number of labels. embed_dim: The dimension of embedding. name: The name of the model. """ pyreader = l.py_reader( capacity=70, shapes=[[-1, 1], [-1, num_labels]], dtypes=['int64', 'float32'], lod_levels=[0, 0], name=name + '_pyreader', use_double_buffer=True) nodes, labels = l.read_file(pyreader) embed_nodes = l.embedding( input=nodes, size=[graph.num_nodes, embed_dim], param_attr='shared_w') embed_nodes.stop_gradient = True logits = l.fc(input=embed_nodes, size=num_labels) loss = l.sigmoid_cross_entropy_with_logits(logits, labels) loss = l.reduce_mean(loss) prob = l.sigmoid(logits) topk = l.reduce_sum(labels, -1) return { 'pyreader': pyreader, 'loss': loss, 'prob': prob, 'labels': labels, 'topk': topk }
def __init__(self, sampled_labels, noise, trick, cfg): self.program = fluid.default_main_program().clone() with fluid.program_guard(self.program): model = ACGAN(cfg.latent_size, cfg.num_classes) self.fake_img = model.network_g(sampled_labels, noise, name='g') self.infer_program = self.program.clone(for_test=True) self.fake, self.aux = model.network_d(self.fake_img, name="d") self.fake_loss = layers.reduce_sum( layers.sigmoid_cross_entropy_with_logits(x=self.fake, label=trick)) self.aux_loss = layers.reduce_sum( layers.softmax_with_cross_entropy(logits=self.aux, label=sampled_labels)) self.loss = self.fake_loss + self.aux_loss vars = [] for var in self.program.list_vars(): if fluid.io.is_parameter(var) and (var.name.startswith("g")): vars.append(var.name) optimizer = fluid.optimizer.Adam(learning_rate=cfg.adam_lr, beta1=cfg.adam_beta_1, name="net_g") optimizer.minimize(self.loss, parameter_list=vars)
def define_learn(self, obs, action, reward): """ update policy model self.model with policy gradient algorithm obs is `inputs` """ tokens = action[0] adjvec = action[1] with fluid.unique_name.guard(): [_, softmax, _, sigmoid] = self.model.policy(obs) reshape_softmax = layers.reshape( softmax, [-1, self.model.parser_args.num_tokens]) reshape_tokens = layers.reshape(tokens, [-1, 1]) reshape_tokens.stop_gradient = True raw_neglogp_to = layers.softmax_with_cross_entropy( soft_label=False, logits=reshape_softmax, label=fluid.layers.cast(x=reshape_tokens, dtype="int64")) action_to_shape_sec = self.model.parser_args.num_nodes * 2 neglogp_to = layers.reshape( fluid.layers.cast(raw_neglogp_to, dtype="float32"), [-1, action_to_shape_sec]) adjvec = layers.cast(x=adjvec, dtype='float32') neglogp_ad = layers.sigmoid_cross_entropy_with_logits(x=sigmoid, label=adjvec) neglogp = layers.elementwise_add(x=layers.reduce_sum(neglogp_to, dim=1), y=layers.reduce_sum(neglogp_ad, dim=1)) reward = layers.cast(reward, dtype="float32") cost = layers.reduce_mean( fluid.layers.elementwise_mul(x=neglogp, y=reward)) optimizer = fluid.optimizer.Adam(learning_rate=self.lr) train_op = optimizer.minimize(cost) return cost
return op_role_key in op.attr_names and \ int(op.all_attrs()[op_role_key]) & int(OpRole.Backward) avgw_list = [] # 自定义Main Program和Start Program main_program = fluid.Program() start_program = fluid.Program() with fluid.program_guard(main_program, start_program): # 组网 slot = fluid.data('slot', [-1, 1], dtype='int64', lod_level=1) label = fluid.data('label', [-1, 1]) emb = layers.embedding(slot, [5, 12]) pool = layers.sequence_pool(emb, 'sum') logit = layers.fc(pool, 1) loss = layers.sigmoid_cross_entropy_with_logits(logit, label) avg_cost = layers.mean(loss) # 定义优化器 sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.01) sgd_optimizer.minimize(avg_cost) decay_var = layers.fill_constant(shape=[1], value=0.9, dtype='float32') rev_decay_var = layers.fill_constant(shape=[1], value=0.1, dtype='float32') block = main_program.global_block() op_maker = core.op_proto_and_checker_maker op_role_key = op_maker.kOpRoleAttrName() # "op_role" op_role_var_key = op_maker.kOpRoleVarAttrName() # "op_role_var" param2avg = [] for idx, op in list(enumerate(block.ops)):
def forward(self, x, y): return L.mean(L.sigmoid_cross_entropy_with_logits(x, y))
def forward(self): src, dsts = L.read_file(self.pyreader) if self.is_sparse: src = L.reshape(src, [-1, 1]) dsts = L.reshape(dsts, [-1, 1]) if self.num_part is not None and self.num_part != 1 and not self.is_distributed: src_embed = distributed_embedding(src, self.num_nodes, self.embed_dim, self.embed_init, "weight", self.num_part, self.is_sparse, learning_rate=self.embedding_lr) dsts_embed = distributed_embedding(dsts, self.num_nodes, self.embed_dim, self.embed_init, "weight", self.num_part, self.is_sparse, learning_rate=self.embedding_lr) else: src_embed = L.embedding(src, (self.num_nodes, self.embed_dim), self.is_sparse, self.is_distributed, param_attr=F.ParamAttr( name="weight", learning_rate=self.embedding_lr, initializer=self.embed_init)) dsts_embed = L.embedding(dsts, (self.num_nodes, self.embed_dim), self.is_sparse, self.is_distributed, param_attr=F.ParamAttr( name="weight", learning_rate=self.embedding_lr, initializer=self.embed_init)) if self.is_sparse: src_embed = L.reshape(src_embed, [-1, 1, self.embed_dim]) dsts_embed = L.reshape(dsts_embed, [-1, self.neg_num + 1, self.embed_dim]) logits = L.matmul(src_embed, dsts_embed, transpose_y=True) # [batch_size, 1, neg_num+1] pos_label = L.fill_constant_batch_size_like(logits, [-1, 1, 1], "float32", 1) neg_label = L.fill_constant_batch_size_like(logits, [-1, 1, self.neg_num], "float32", 0) label = L.concat([pos_label, neg_label], -1) pos_weight = L.fill_constant_batch_size_like(logits, [-1, 1, 1], "float32", self.neg_num) neg_weight = L.fill_constant_batch_size_like(logits, [-1, 1, self.neg_num], "float32", 1) weight = L.concat([pos_weight, neg_weight], -1) weight.stop_gradient = True label.stop_gradient = True loss = L.sigmoid_cross_entropy_with_logits(logits, label) loss = loss * weight loss = L.reduce_mean(loss) loss = loss * ((self.neg_num + 1) / 2 / self.neg_num) loss.persistable = True self.loss = loss return loss
def forward(self): """ forward """ src, dst = L.read_file(self.pyreader) src_id = L.slice(src, [0, 1, 2, 3], [0, 0, 0, 0], [int(math.pow(2, 30)) - 1, 1, 1, 1]) dst_id = L.slice(dst, [0, 1, 2, 3], [0, 0, 0, 0], [int(math.pow(2, 30)) - 1, self.neg_num + 1, 1, 1]) if self.is_sparse: # sparse mode use 2 dims input. src = L.reshape(src, [-1, 1]) dst = L.reshape(dst, [-1, 1]) # [b, 1, f, h] src_embed = split_embedding(src, self.num_nodes, self.hidden_size, self.embed_init, "weight", self.num_part, self.is_sparse) # [b, n+1, f, h] dst_embed = split_embedding(dst, self.num_nodes, self.hidden_size, self.embed_init, "weight", self.num_part, self.is_sparse) if self.is_sparse: src_embed = L.reshape(src_embed, [-1, 1, self.num_featuers, self.hidden_size]) dst_embed = L.reshape( dst_embed, [-1, self.neg_num + 1, self.num_featuers, self.hidden_size]) # [b, 1, 1, f] src_weight = L.softmax( L.embedding(src_id, [self.num_nodes, self.num_featuers], param_attr=F.ParamAttr(name="alpha"))) # [b, n+1, 1, f] dst_weight = L.softmax( L.embedding(dst_id, [self.num_nodes, self.num_featuers], param_attr=F.ParamAttr(name="alpha"))) # [b, 1, h] src_sum = L.squeeze(L.matmul(src_weight, src_embed), axes=[2]) # [b, n+1, h] dst_sum = L.squeeze(L.matmul(dst_weight, dst_embed), axes=[2]) logits = L.matmul(src_sum, dst_sum, transpose_y=True) # [batch_size, 1, neg_num+1] pos_label = L.fill_constant_batch_size_like(logits, [-1, 1, 1], "float32", 1) neg_label = L.fill_constant_batch_size_like(logits, [-1, 1, self.neg_num], "float32", 0) label = L.concat([pos_label, neg_label], -1) pos_weight = L.fill_constant_batch_size_like(logits, [-1, 1, 1], "float32", self.neg_num) neg_weight = L.fill_constant_batch_size_like(logits, [-1, 1, self.neg_num], "float32", 1) weight = L.concat([pos_weight, neg_weight], -1) weight.stop_gradient = True label.stop_gradient = True loss = L.sigmoid_cross_entropy_with_logits(logits, label) loss = loss * weight loss = L.reduce_mean(loss) loss = loss * ((self.neg_num + 1) / 2 / self.neg_num) loss.persistable = True self.loss = loss return loss
def loss(self, predictions, labels): per_example_loss = L.sigmoid_cross_entropy_with_logits( predictions, L.cast(labels, 'float32')) loss = L.reduce_mean(per_example_loss) return loss
def __call__( self, predictions, labels_pos_mask, # Shape: [batch_size, 19248, 1] labels_neg_mask, # Shape: [batch_size, 19248, 1] labels_allboxes_vector, # Shape: [batch_size, 19248, 8] segment_t, # list Shape: [batch_size, 19248, 1] label_masks, labels_best_truth_idx, labels_pos_index, labels_pos_cid, # Shape: [batch_size, 19248] labels_pos_cid2, # Shape: [batch_size, 19248] priors, class_vectors, batch_size, use_maskiou=True, use_ce_loss=True, use_ghm_c_loss=False, use_focal_loss=False, use_ohem_loss=False): pred_allboxes_encode_x0y0x1y1 = predictions[ 'loc'] # Shape: [batch_size, 19248, 4] pred_allboxes_conf = predictions[ 'conf'] # Shape: [batch_size, 19248, 1+80] pred_allboxes_mask_coef = predictions[ 'mask'] # Shape: [batch_size, 19248, 原型数=32] pred_proto = predictions[ 'proto'] # Shape: [batch_size, s4=138, s4=138, 原型数=32] pred_segm = predictions[ 'segm'] # Shape: [batch_size, 类别数=80, s8=69, s8=69] labels_allboxes_x0y0x1y1 = labels_allboxes_vector[:, :, 0: 4] # Shape: [batch_size, 19248, 4] labels_allboxes_decode_x0y0x1y1 = labels_allboxes_vector[:, :, 4: 8] # Shape: [batch_size, 19248, 4] losses = {} # 1.bbox_loss,只有正例才计算。 # bbox_alpha = 1.5 # bbox_loss = P.smooth_l1(P.reshape(pred_allboxes_encode_x0y0x1y1, (-1, 4)), P.reshape(labels_allboxes_x0y0x1y1, (-1, 4))) # bbox_loss = P.reshape(labels_pos_mask, (-1, 1)) * bbox_loss # bbox_loss = P.reduce_sum(bbox_loss) * bbox_alpha # losses['B'] = bbox_loss # 1.bbox_loss,ciou_loss pred_x0y0x1y1 = [] for idx in range(batch_size): temp = decode(pred_allboxes_encode_x0y0x1y1[idx], priors) pred_x0y0x1y1.append(temp) pred_x0y0x1y1 = P.concat(pred_x0y0x1y1, axis=0) # Shape: [batch_size*num_priors, 4] pred_x0y0x1y1 = P.reshape( pred_x0y0x1y1, (batch_size, -1, 4)) # Shape: [batch_size, num_priors, 4] ciou = P.reshape( self.bbox_ciou(pred_x0y0x1y1, labels_allboxes_decode_x0y0x1y1), (batch_size, -1, 1)) # (batch_size, num_priors, 1) # 每个预测框ciou_loss的权重 = 2 - (ground truth的面积/图片面积) gt_area = (labels_allboxes_decode_x0y0x1y1[:, :, 2:3] - labels_allboxes_decode_x0y0x1y1[:, :, 0:1]) * \ (labels_allboxes_decode_x0y0x1y1[:, :, 3:4] - labels_allboxes_decode_x0y0x1y1[:, :, 1:2]) bbox_loss_scale = 2.0 - gt_area ciou_loss = labels_pos_mask * bbox_loss_scale * (1 - ciou) bbox_alpha = 1.5 ciou_loss = P.reduce_sum(ciou_loss) * bbox_alpha losses['B'] = ciou_loss # 2.mask_loss,只有正例才计算 mask_h = P.shape(pred_proto)[1] mask_w = P.shape(pred_proto)[2] loss_m = 0 maskiou_t_list = [] maskiou_net_input_list = [] label_t_list = [] for idx in range(batch_size): # [[0], [0], [0], [0], [0], [0], [0], [0]]。把8个正样本的最匹配gt的下标(在label_x0y0x1y1cid[idx]中的下标)选出来。 # 因为只有一个gt,所以下标全是0 labels_pos_index[idx].stop_gradient = True cur_gt = P.gather(labels_best_truth_idx[idx], labels_pos_index[idx]) # (?, 1) cur_gt.stop_gradient = True cur_x0y0x1y1 = P.gather(labels_allboxes_decode_x0y0x1y1[idx], labels_pos_index[idx]) # (?, 4) proto_masks = pred_proto[idx] # (138, 138, 32) # pred_mask_coef (batch_size, 19248, 32)。 把8个正样本预测的mask系数选出来。 proto_coef = P.gather(pred_allboxes_mask_coef[idx], labels_pos_index[idx]) # (?, 32) # (?, 138, 138),把8个正样本所匹配的gt的真实mask抽出来。因为匹配到同一个gt,所以是同一个mask重复了8次。 mask_t = P.gather(label_masks[idx], cur_gt) # (?, 138, 138) # (?, ),把8个正样本所匹配的gt的真实cid抽出来。因为匹配到同一个gt,所以是同一个cid重复了8次。 label_t = P.gather(labels_pos_cid[idx], labels_pos_index[idx]) # (?, ) # Size: (138, 138, ?) = 原型*系数转置 pred_masks = P.matmul(proto_masks, proto_coef, transpose_y=True) pred_masks = P.sigmoid(pred_masks) # sigmoid激活 pred_masks = crop(pred_masks, cur_x0y0x1y1) pred_masks = P.transpose(pred_masks, perm=[2, 0, 1]) masks_pos_loss = mask_t * (0 - P.log(pred_masks + 1e-9) ) # 二值交叉熵,加了极小的常数防止nan masks_neg_loss = (1 - mask_t) * (0 - P.log(1 - pred_masks + 1e-9) ) # 二值交叉熵,加了极小的常数防止nan pre_loss = (masks_pos_loss + masks_neg_loss) pre_loss = P.reduce_sum(pre_loss, dim=[1, 2]) # gt面积越小,对应mask损失权重越大 cur_cxcywh = center_size(cur_x0y0x1y1) gt_box_width = cur_cxcywh[:, 2] gt_box_height = cur_cxcywh[:, 3] pre_loss = pre_loss / (gt_box_width * gt_box_height) loss_m += P.reduce_sum(pre_loss) if use_maskiou: # mask_t中,面积<=5*5的被丢弃 # discard_mask_area = 5*5 ''' gpu版本的paddlepaddle1.6.2里有一个问题。select如果是[None],并且在gather()里使用了select,就会出现 cudaGetLastError invalid configuration argument errno: 9 这个错误。cpu版本则可以正常跑。 为了避免上面的问题,只能让select不是[None],所以这里不做面积过滤,mask_t全部保留。 ''' discard_mask_area = -1 gt_mask_area = P.reduce_sum(mask_t, dim=[1, 2]) gt_mask_area.stop_gradient = True select = P.where(gt_mask_area > discard_mask_area) select.stop_gradient = True pred_masks = P.gather(pred_masks, select) mask_t = P.gather(mask_t, select) label_t = P.gather(label_t, select) label_t.stop_gradient = True maskiou_net_input = P.reshape( pred_masks, (P.shape(pred_masks)[0], 1, mask_h, mask_w)) pred_masks = P.cast(pred_masks > 0.5, 'float32') # 四舍五入 maskiou_t = self._mask_iou(pred_masks, mask_t) # (8, ) maskiou_net_input_list.append( maskiou_net_input) # (8, 1, 138, 138) maskiou_t_list.append(maskiou_t) # (8, ) label_t_list.append(label_t) # (8, ) mask_alpha = 6.125 losses['M'] = loss_m * mask_alpha / mask_h / mask_w # 余下部分 if use_maskiou: maskiou_net_input = P.concat( maskiou_net_input_list, axis=0) # (21, 1, 138, 138) 21个正例预测的掩码 maskiou_t = P.concat(maskiou_t_list, axis=0) # (21, ) 21个正例预测的掩码和真实掩码的iou label_t = P.concat(label_t_list, axis=0) # (21, ) 21个正例预测的cid label_t.stop_gradient = True # 因为是整数所以才? maskiou_targets = [maskiou_net_input, maskiou_t, label_t] # 3.conf_loss。 conf_alpha = 1.0 if use_ce_loss: conf_loss = self.ce_conf_loss(pred_allboxes_conf, labels_pos_mask, labels_neg_mask, class_vectors, labels_pos_cid2, gt_area) elif use_ghm_c_loss: conf_loss = self.ghm_c_loss(pred_allboxes_conf, labels_pos_mask, labels_neg_mask, class_vectors, labels_pos_cid2) elif use_focal_loss: conf_loss = self.focal_conf_loss(pred_allboxes_conf, labels_pos_mask, labels_neg_mask, class_vectors, labels_pos_cid2) elif use_ohem_loss: conf_loss = self.ohem_conf_loss(pred_allboxes_conf, batch_size, labels_neg_mask, labels_pos_mask, labels_pos_index, class_vectors, labels_pos_cid) losses['C'] = conf_loss * conf_alpha # 4.mask_iou_loss,只有正例才计算。 if use_maskiou: # maskiou_net_input (21, 1, 138, 138) 21个正例预测的掩码 # maskiou_t (21, ) 21个正例预测的掩码和真实掩码的iou # label_t (21, ) 21个正例预测的cid maskiou_net_input, maskiou_t, label_t = maskiou_targets maskiou_p = maskiou_net(maskiou_net_input, self.num_classes - 1) maskiou_p = P.reduce_max(maskiou_p, dim=[2, 3]) # 最大池化 (21, 80) temp_mask = P.gather(class_vectors, label_t) # 掩码 (21, 81) temp_mask = temp_mask[:, 1:] # 掩码 (21, 80) maskiou_p = temp_mask * maskiou_p # 只保留真实类别的那个通道 (21, 80) maskiou_p = P.reduce_sum(maskiou_p, dim=1, keep_dim=True) # (21, 1) loss_i = P.smooth_l1( maskiou_p, P.reshape(maskiou_t, (P.shape(maskiou_t)[0], 1))) maskiou_alpha = 25.0 losses['I'] = maskiou_alpha * P.reduce_sum(loss_i) # 5.semantic_segmentation_loss,只有正例才计算 mask_h = P.shape(pred_segm)[2] mask_w = P.shape(pred_segm)[3] loss_s = 0.0 for idx in range(batch_size): cur_segment = pred_segm[idx] # (80, 69, 69) l = P.sigmoid_cross_entropy_with_logits(cur_segment, segment_t[idx]) loss_s += P.reduce_sum(l) semantic_segmentation_alpha = 1.0 losses['S'] = loss_s / mask_h / mask_w * semantic_segmentation_alpha total_num_pos = P.cast(P.reduce_sum(labels_pos_mask), 'float32') for k in losses: if k not in ('S', ): losses[k] /= total_num_pos else: losses[k] /= batch_size total_loss = 0.0 for k in losses: total_loss += losses[k] # Loss Key: # - B: Box Localization Loss # - M: Mask Loss # - C: Class Confidence Loss # - I: MaskIou Loss # - S: Semantic Segmentation Loss # return losses['M'], losses['C'] return losses, total_loss