def create_model(args, ernie_config): input_names = ("src_ids", "sent_ids", "pos_ids", "task_ids", "input_mask") shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1]] dtypes=[ 'int64', 'int64', 'int64', 'int64', 'float32' ] inputs = [fluid.data(name, shape, dtype=dtype) for name, shape, dtype in zip(input_names, shapes, dtypes)] (src_ids, sent_ids, pos_ids, task_ids, input_mask) = inputs ernie = ErnieModel( src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, task_ids=task_ids, input_mask=input_mask, config=ernie_config, use_fp16=args.use_fp16) seq_out = ernie.get_sequence_output() cls_feats = ernie.get_pooled_output() # dummy layers to name the latent layers. the save_inf_model produce uncomprehensible names # like 'save_infer_model/scale_1' seq_out = fluid.layers.scale(seq_out, scale=1.0, name='ernie_sequence_latent') cls_feats = fluid.layers.scale(cls_feats, scale=1.0, name='ernie_classification') for i, inp in enumerate(inputs): print(f'input[{i}]:', inp.name, inp.shape, inp.dtype) print('sequence_output :', seq_out.name, seq_out.shape, seq_out.dtype) print('classifier_output:', cls_feats.name, cls_feats.shape, cls_feats.dtype) return inputs, [seq_out, cls_feats]
def cls_from_ernie( args, src_ids, position_ids, sentence_ids, task_ids, input_mask, config, use_fp16, ): """cls_from_ernie""" ernie = ErnieModel( src_ids=src_ids, position_ids=position_ids, sentence_ids=sentence_ids, task_ids=task_ids, input_mask=input_mask, config=config, use_fp16=use_fp16, ) cls_feats = ernie.get_pooled_output() cls_feats = fluid.layers.dropout( x=cls_feats, dropout_prob=0.1, dropout_implementation="upscale_in_train", ) return cls_feats
def create_model(args, pyreader_name, ernie_config): pyreader = fluid.layers.py_reader( capacity=50, shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1]], dtypes=['int64', 'int64', 'int64', 'float', 'int64'], lod_levels=[0, 0, 0, 0, 0], name=pyreader_name, use_double_buffer=True) (src_ids, sent_ids, pos_ids, input_mask, seq_lens) = fluid.layers.read_file(pyreader) ernie = ErnieModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, input_mask=input_mask, config=ernie_config) enc_out = ernie.get_sequence_output() unpad_enc_out = fluid.layers.sequence_unpad(enc_out, length=seq_lens) cls_feats = ernie.get_pooled_output() # set persistable = True to avoid memory opimizing enc_out.persistable = True unpad_enc_out.persistable = True cls_feats.persistable = True graph_vars = { "cls_embeddings": cls_feats, "top_layer_embeddings": unpad_enc_out, } return pyreader, graph_vars
def forward(self, features): src_ids, sent_ids = features dtype = 'float16' if self.hparam['fp16'] else 'float32' zero = L.fill_constant([1], dtype='int64', value=0) input_mask = L.cast(L.logical_not(L.equal(src_ids, zero)), dtype) # assume pad id == 0 #input_mask = L.unsqueeze(input_mask, axes=[2]) d_shape = L.shape(src_ids) seqlen = d_shape[1] batch_size = d_shape[0] pos_ids = L.unsqueeze(L.range(0, seqlen, 1, dtype='int32'), axes=[0]) pos_ids = L.expand(pos_ids, [batch_size, 1]) pos_ids = L.unsqueeze(pos_ids, axes=[2]) pos_ids = L.cast(pos_ids, 'int64') pos_ids.stop_gradient = True input_mask.stop_gradient = True task_ids = L.zeros_like(src_ids) + self.hparam.task_id #this shit wont use at the moment task_ids.stop_gradient = True bert = ErnieModel( src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, task_ids=task_ids, input_mask=input_mask, config=self.hparam, use_fp16=self.hparam['fp16'] ) cls_feats = bert.get_pooled_output() cls_feats = L.dropout( x=cls_feats, dropout_prob=0.1, dropout_implementation="upscale_in_train" ) logits = L.fc( input=cls_feats, size=self.hparam['num_label'], param_attr=F.ParamAttr( name="cls_out_w", initializer=F.initializer.TruncatedNormal(scale=0.02)), bias_attr=F.ParamAttr( name="cls_out_b", initializer=F.initializer.Constant(0.)) ) propeller.summary.histogram('pred', logits) if self.mode is propeller.RunMode.PREDICT: probs = L.softmax(logits) return probs else: return logits
def _model(is_noise=False): ernie = ErnieModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, task_ids=task_ids, input_mask=input_mask, config=ernie_config, is_noise=is_noise) cls_feats = ernie.get_pooled_output() if not is_noise: cls_feats = fluid.layers.dropout( x=cls_feats, dropout_prob=0.1, dropout_implementation="upscale_in_train") logits = fluid.layers.fc( input=cls_feats, size=args.num_labels, param_attr=fluid.ParamAttr( name=task_name + "_cls_out_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr( name=task_name + "_cls_out_b", initializer=fluid.initializer.Constant(0.))) """ if is_prediction: probs = fluid.layers.softmax(logits) feed_targets_name = [ src_ids.name, sent_ids.name, pos_ids.name, input_mask.name ] if ernie_version == "2.0": feed_targets_name += [task_ids.name] return pyreader, probs, feed_targets_name """ num_seqs = fluid.layers.create_tensor(dtype='int64') ## add focal loss ce_loss, probs = fluid.layers.softmax_with_cross_entropy( logits=logits, label=labels, return_softmax=True) loss = fluid.layers.mean(x=ce_loss) accuracy = fluid.layers.accuracy(input=probs, label=labels, total=num_seqs) graph_vars = { "loss": loss, "probs": probs, "accuracy": accuracy, "labels": labels, "num_seqs": num_seqs, "qids": qids } return graph_vars
def create_model_predict(args, ernie_config, is_prediction=False): (src_ids, sent_ids, pos_ids, input_mask, task_ids) = make_all_inputs(args) ernie = ErnieModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, task_ids=task_ids, input_mask=input_mask, config=ernie_config, use_fp16=args.use_fp16) cls_feats = ernie.get_pooled_output() cls_feats = fluid.layers.dropout(x=cls_feats, dropout_prob=0.1, dropout_implementation="upscale_in_train") logits = fluid.layers.fc( input=cls_feats, size=args.num_labels, param_attr=fluid.ParamAttr( name="_cls_out_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr(name="_cls_out_b", initializer=fluid.initializer.Constant(0.))) if is_prediction: probs = fluid.layers.softmax(logits) feed_targets_name = [ src_ids.name, pos_ids.name, sent_ids.name, input_mask.name ] graph_vars = { "probs": probs, } for k, v in graph_vars.items(): v.persistable = True return probs, graph_vars return graph_vars
def create_model(args, pyreader_name, ernie_config, is_prediction=False): pyreader = fluid.layers.py_reader( capacity=50, shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1], [-1, 1]], dtypes=['int64', 'int64', 'int64', 'float32', 'int64', 'int64'], lod_levels=[0, 0, 0, 0, 0, 0], name=pyreader_name, use_double_buffer=True) (src_ids, sent_ids, pos_ids, input_mask, labels, qids) = fluid.layers.read_file(pyreader) ernie = ErnieModel( src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, input_mask=input_mask, config=ernie_config, use_fp16=args.use_fp16) cls_feats = ernie.get_pooled_output() cls_feats = fluid.layers.dropout( x=cls_feats, dropout_prob=0.1, dropout_implementation="upscale_in_train") logits = fluid.layers.fc( input=cls_feats, size=args.num_labels, param_attr=fluid.ParamAttr( name="cls_out_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr( name="cls_out_b", initializer=fluid.initializer.Constant(0.))) if is_prediction: probs = fluid.layers.softmax(logits) feed_targets_name = [ src_ids.name, sent_ids.name, pos_ids.name, input_mask.name ] return pyreader, probs, feed_targets_name ce_loss, probs = fluid.layers.softmax_with_cross_entropy( logits=logits, label=labels, return_softmax=True) loss = fluid.layers.mean(x=ce_loss) if args.use_fp16 and args.loss_scaling > 1.0: loss *= args.loss_scaling num_seqs = fluid.layers.create_tensor(dtype='int64') accuracy = fluid.layers.accuracy(input=probs, label=labels, total=num_seqs) auc, batch_auc, [batch_stat_pos, batch_stat_neg, stat_pos, stat_neg] = fluid.layers.auc(input=probs, label=labels) graph_vars = { "loss": loss, "probs": probs, "accuracy": accuracy, "labels": labels, "num_seqs": num_seqs, "qids": qids, "auc": auc, "batch_auc": batch_auc, "batch_stat_pos": batch_stat_pos, "batch_stat_neg": batch_stat_neg, "stat_pos": stat_pos, "stat_neg": stat_neg } for k, v in graph_vars.items(): v.persistable = True return pyreader, graph_vars
def create_model(args, pyreader_name, ernie_config, is_prediction=False, task_name="", is_classify=False, is_regression=False, ernie_version="1.0"): src_ids = fluid.layers.data(name='eval_placeholder_0', shape=[-1, args.max_seq_len, 1], dtype='int64') sent_ids = fluid.layers.data(name='eval_placeholder_1', shape=[-1, args.max_seq_len, 1], dtype='int64') pos_ids = fluid.layers.data(name='eval_placeholder_2', shape=[-1, args.max_seq_len, 1], dtype='int64') input_mask = fluid.layers.data(name='eval_placeholder_3', shape=[-1, args.max_seq_len, 1], dtype='float32') task_ids = fluid.layers.data(name='eval_placeholder_4', shape=[-1, args.max_seq_len, 1], dtype='int64') qids = fluid.layers.data(name='eval_placeholder_5', shape=[-1, 1], dtype='int64') if is_classify: labels = fluid.layers.data(name='6', shape=[-1, 1], dtype='int64') elif is_regression: labels = fluid.layers.data(name='6', shape=[-1, 1], dtype='float32') pyreader = fluid.io.DataLoader.from_generator(feed_list=[ src_ids, sent_ids, pos_ids, task_ids, input_mask, labels, qids ], capacity=70, iterable=False) ernie = ErnieModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, task_ids=task_ids, input_mask=input_mask, config=ernie_config, use_fp16=args.use_fp16) cls_feats = ernie.get_pooled_output() cls_feats = fluid.layers.dropout(x=cls_feats, dropout_prob=0.1, dropout_implementation="upscale_in_train") logits = fluid.layers.fc( input=cls_feats, size=args.num_labels, param_attr=fluid.ParamAttr( name=task_name + "_cls_out_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr(name=task_name + "_cls_out_b", initializer=fluid.initializer.Constant(0.))) assert is_classify != is_regression, 'is_classify or is_regression must be true and only one of them can be true' if is_prediction: if is_classify: probs = fluid.layers.softmax(logits) else: probs = logits feed_targets_name = [ src_ids.name, sent_ids.name, pos_ids.name, input_mask.name ] if ernie_version == "2.0": feed_targets_name += [task_ids.name] return pyreader, probs, feed_targets_name num_seqs = fluid.layers.create_tensor(dtype='int64') if is_classify: ce_loss, probs = fluid.layers.softmax_with_cross_entropy( logits=logits, label=labels, return_softmax=True) loss = fluid.layers.mean(x=ce_loss) accuracy = fluid.layers.accuracy(input=probs, label=labels, total=num_seqs) graph_vars = { "loss": loss, "probs": probs, "accuracy": accuracy, "labels": labels, "num_seqs": num_seqs, "qids": qids, "logits": logits # add for middle state } elif is_regression: cost = fluid.layers.square_error_cost(input=logits, label=labels) loss = fluid.layers.mean(x=cost) graph_vars = { "loss": loss, "probs": logits, "labels": labels, "num_seqs": num_seqs, "qids": qids } else: raise ValueError( 'unsupported fine tune mode. only supported classify/regression') return pyreader, graph_vars
def create_model(args, pyreader_name, ernie_config, is_prediction=False, task_name="", is_classify=False, is_regression=False, ernie_version="1.0"): """ this function is mainly for creating model and inputs placeholder """ if is_classify: pyreader = fluid.layers.py_reader(capacity=50, shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1], [-1, 1]], dtypes=[ 'int64', 'int64', 'int64', 'int64', 'float32', 'int64', 'int64' ], lod_levels=[0, 0, 0, 0, 0, 0, 0], name=task_name + "_" + pyreader_name, use_double_buffer=True) elif is_regression: pyreader = fluid.layers.py_reader(capacity=50, shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1], [-1, 1]], dtypes=[ 'int64', 'int64', 'int64', 'int64', 'float32', 'float32', 'int64' ], lod_levels=[0, 0, 0, 0, 0, 0, 0], name=task_name + "_" + pyreader_name, use_double_buffer=True) (src_ids, sent_ids, pos_ids, task_ids, input_mask, labels, qids) = fluid.layers.read_file(pyreader) ernie = ErnieModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, task_ids=task_ids, input_mask=input_mask, config=ernie_config, use_fp16=args.use_fp16) cls_feats = ernie.get_pooled_output() cls_feats = fluid.layers.dropout(x=cls_feats, dropout_prob=0.1, dropout_implementation="upscale_in_train") logits = fluid.layers.fc( input=cls_feats, size=args.num_labels, param_attr=fluid.ParamAttr( name=task_name + "_cls_out_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr(name=task_name + "_cls_out_b", initializer=fluid.initializer.Constant(0.))) if is_prediction: probs = fluid.layers.softmax(logits) feed_targets_name = [ src_ids.name, sent_ids.name, pos_ids.name, input_mask.name ] if ernie_version == "2.0": feed_targets_name += [task_ids.name] return pyreader, probs, feed_targets_name assert is_classify != is_regression, 'is_classify or is_regression must be true and only one of them can be true' num_seqs = fluid.layers.create_tensor(dtype='int64') if is_classify: ce_loss, probs = fluid.layers.softmax_with_cross_entropy( logits=logits, label=labels, return_softmax=True) loss = fluid.layers.mean(x=ce_loss) accuracy = fluid.layers.accuracy(input=probs, label=labels, total=num_seqs) graph_vars = { "loss": loss, "probs": probs, "accuracy": accuracy, "labels": labels, "num_seqs": num_seqs, "qids": qids } elif is_regression: cost = fluid.layers.square_error_cost(input=logits, label=labels) loss = fluid.layers.mean(x=cost) graph_vars = { "loss": loss, "probs": logits, "labels": labels, "num_seqs": num_seqs, "qids": qids } else: raise ValueError( 'unsupported fine tune mode. only supported classify/regression') return pyreader, graph_vars
def create_model(args, pyreader_name, ernie_config, is_prediction=False, task_name="", is_classify=False, is_regression=False, ernie_version="1.0"): if is_classify: # 增加邻接矩阵和核心词的shape pyreader = fluid.layers.py_reader(capacity=50, shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1], [-1, 1], [-1, args.max_seq_len, args.max_seq_len], [-1, 2]], dtypes=[ 'int64', 'int64', 'int64', 'int64', 'float32', 'int64', 'int64', 'int64', 'int64' ], lod_levels=[0, 0, 0, 0, 0, 0, 0, 0, 0], name=task_name + "_" + pyreader_name, use_double_buffer=True) elif is_regression: pyreader = fluid.layers.py_reader(capacity=50, shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1], [-1, 1]], dtypes=['int64', 'int64', 'int64', 'int64', 'float32', 'float32', 'int64'], lod_levels=[0, 0, 0, 0, 0, 0, 0], name=task_name + "_" + pyreader_name, use_double_buffer=True) (src_ids, sent_ids, pos_ids, task_ids, input_mask, labels, qids, adj_mat, head_ids) = fluid.layers.read_file(pyreader) ernie = ErnieModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, task_ids=task_ids, input_mask=input_mask, config=ernie_config, use_fp16=args.use_fp16) erinie_output = ernie.get_sequence_output() cls_feats = ernie.get_pooled_output() # 增加GAT网络 gat = gnn.GAT(input_size=768, hidden_size=100, output_size=50, dropout=0.0, alpha=0.1, heads=12, layer=2) # 将ernie的表示和邻接矩阵输入到gat网络中得到包含句子结构信息的表示 gat_emb = gat.forward(erinie_output, adj_mat) # 提取核心词的表示 gat_emb = utils.index_sample(gat_emb, head_ids) # 将[CLS]和核心词的表示拼接,供下游网络使用 cls_feats = fluid.layers.concat([cls_feats, gat_emb], axis=1) cls_feats = fluid.layers.dropout(x=cls_feats, dropout_prob=0.1, dropout_implementation="upscale_in_train") logits = fluid.layers.fc(input=cls_feats, size=args.num_labels, param_attr=fluid.ParamAttr(name=task_name + "_cls_out_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr(name=task_name + "_cls_out_b", initializer=fluid.initializer.Constant(0.))) if is_prediction: probs = fluid.layers.softmax(logits) feed_targets_name = [src_ids.name, sent_ids.name, pos_ids.name, input_mask.name] if ernie_version == "2.0": feed_targets_name += [task_ids.name] return pyreader, probs, feed_targets_name assert is_classify != is_regression, 'is_classify or is_regression must be true and only one of them can be true' num_seqs = fluid.layers.create_tensor(dtype='int64') if is_classify: ce_loss, probs = fluid.layers.softmax_with_cross_entropy(logits=logits, label=labels, return_softmax=True) loss = fluid.layers.mean(x=ce_loss) accuracy = fluid.layers.accuracy(input=probs, label=labels, total=num_seqs) graph_vars = { "loss": loss, "probs": probs, "accuracy": accuracy, "labels": labels, "num_seqs": num_seqs, "qids": qids } elif is_regression: cost = fluid.layers.square_error_cost(input=logits, label=labels) loss = fluid.layers.mean(x=cost) graph_vars = {"loss": loss, "probs": logits, "labels": labels, "num_seqs": num_seqs, "qids": qids} else: raise ValueError('unsupported fine tune mode. only supported classify/regression') return pyreader, graph_vars