def cls_from_ernie( args, src_ids, position_ids, sentence_ids, task_ids, input_mask, config, use_fp16, ): """cls_from_ernie""" ernie = ErnieModel( src_ids=src_ids, position_ids=position_ids, sentence_ids=sentence_ids, task_ids=task_ids, input_mask=input_mask, config=config, use_fp16=use_fp16, ) cls_feats = ernie.get_pooled_output() cls_feats = fluid.layers.dropout( x=cls_feats, dropout_prob=0.1, dropout_implementation="upscale_in_train", ) return cls_feats
def create_model(args, pyreader_name, ernie_config): pyreader = fluid.layers.py_reader( capacity=50, shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1]], dtypes=['int64', 'int64', 'int64', 'float', 'int64'], lod_levels=[0, 0, 0, 0, 0], name=pyreader_name, use_double_buffer=True) (src_ids, sent_ids, pos_ids, input_mask, seq_lens) = fluid.layers.read_file(pyreader) ernie = ErnieModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, input_mask=input_mask, config=ernie_config) enc_out = ernie.get_sequence_output() unpad_enc_out = fluid.layers.sequence_unpad(enc_out, length=seq_lens) cls_feats = ernie.get_pooled_output() # set persistable = True to avoid memory opimizing enc_out.persistable = True unpad_enc_out.persistable = True cls_feats.persistable = True graph_vars = { "cls_embeddings": cls_feats, "top_layer_embeddings": unpad_enc_out, } return pyreader, graph_vars
def create_model(args, ernie_config): input_names = ("src_ids", "sent_ids", "pos_ids", "task_ids", "input_mask") shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1]] dtypes=[ 'int64', 'int64', 'int64', 'int64', 'float32' ] inputs = [fluid.data(name, shape, dtype=dtype) for name, shape, dtype in zip(input_names, shapes, dtypes)] (src_ids, sent_ids, pos_ids, task_ids, input_mask) = inputs ernie = ErnieModel( src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, task_ids=task_ids, input_mask=input_mask, config=ernie_config, use_fp16=args.use_fp16) seq_out = ernie.get_sequence_output() cls_feats = ernie.get_pooled_output() # dummy layers to name the latent layers. the save_inf_model produce uncomprehensible names # like 'save_infer_model/scale_1' seq_out = fluid.layers.scale(seq_out, scale=1.0, name='ernie_sequence_latent') cls_feats = fluid.layers.scale(cls_feats, scale=1.0, name='ernie_classification') for i, inp in enumerate(inputs): print(f'input[{i}]:', inp.name, inp.shape, inp.dtype) print('sequence_output :', seq_out.name, seq_out.shape, seq_out.dtype) print('classifier_output:', cls_feats.name, cls_feats.shape, cls_feats.dtype) return inputs, [seq_out, cls_feats]
def create_model(pyreader_name, ernie_config): pyreader = fluid.layers.py_reader( capacity=70, shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, args.max_seq_len], [-1, 1], [-1, 1], [-1, 1]], dtypes=[ 'int64', 'int64', 'int64', 'float32', 'int64', 'int64', 'int64' ], lod_levels=[0, 0, 0, 0, 0, 0, 0], name=pyreader_name, use_double_buffer=True) (src_ids, pos_ids, sent_ids, input_mask, mask_label, mask_pos, labels) = fluid.layers.read_file(pyreader) ernie = ErnieModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, input_mask=input_mask, config=ernie_config, weight_sharing=args.weight_sharing, use_fp16=args.use_fp16) next_sent_acc, mask_lm_loss, total_loss = ernie.get_pretraining_output( mask_label, mask_pos, labels, args.next_sen_coef) if args.use_fp16 and args.loss_scaling > 1.0: total_loss *= args.loss_scaling return pyreader, next_sent_acc, mask_lm_loss, total_loss
def create_model(args, pyreader_name, ernie_config, is_prediction=False): pyreader = fluid.layers.py_reader( capacity=50, shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, args.max_seq_len], [-1, args.max_seq_len, 1], [-1, 1]], dtypes=['int64', 'int64', 'int64', 'float', 'int64', 'int64'], lod_levels=[0, 0, 0, 0, 0, 0], name=pyreader_name, use_double_buffer=True) (src_ids, sent_ids, pos_ids, self_attn_mask, labels, seq_lens) = fluid.layers.read_file(pyreader) ernie = ErnieModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, self_attn_mask=self_attn_mask, config=ernie_config, use_fp16=args.use_fp16) enc_out = ernie.get_sequence_output() logits = fluid.layers.fc( input=enc_out, size=args.num_labels, num_flatten_dims=2, param_attr=fluid.ParamAttr( name="cls_seq_label_out_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr(name="cls_seq_label_out_b", initializer=fluid.initializer.Constant(0.))) ret_labels = fluid.layers.reshape(x=labels, shape=[-1, 1]) ret_infers = fluid.layers.reshape(x=fluid.layers.argmax(logits, axis=2), shape=[-1, 1]) labels = fluid.layers.flatten(labels, axis=2) ce_loss, probs = fluid.layers.softmax_with_cross_entropy( logits=fluid.layers.flatten(logits, axis=2), label=labels, return_softmax=True) loss = fluid.layers.mean(x=ce_loss) if args.use_fp16 and args.loss_scaling > 1.0: loss *= args.loss_scaling graph_vars = { "loss": loss, "probs": probs, "labels": ret_labels, "infers": ret_infers, "seq_lens": seq_lens } for k, v in graph_vars.items(): v.persistable = True return pyreader, graph_vars
def forward(self, features): src_ids, sent_ids = features dtype = 'float16' if self.hparam['fp16'] else 'float32' zero = L.fill_constant([1], dtype='int64', value=0) input_mask = L.cast(L.logical_not(L.equal(src_ids, zero)), dtype) # assume pad id == 0 #input_mask = L.unsqueeze(input_mask, axes=[2]) d_shape = L.shape(src_ids) seqlen = d_shape[1] batch_size = d_shape[0] pos_ids = L.unsqueeze(L.range(0, seqlen, 1, dtype='int32'), axes=[0]) pos_ids = L.expand(pos_ids, [batch_size, 1]) pos_ids = L.unsqueeze(pos_ids, axes=[2]) pos_ids = L.cast(pos_ids, 'int64') pos_ids.stop_gradient = True input_mask.stop_gradient = True task_ids = L.zeros_like(src_ids) + self.hparam.task_id #this shit wont use at the moment task_ids.stop_gradient = True bert = ErnieModel( src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, task_ids=task_ids, input_mask=input_mask, config=self.hparam, use_fp16=self.hparam['fp16'] ) cls_feats = bert.get_pooled_output() cls_feats = L.dropout( x=cls_feats, dropout_prob=0.1, dropout_implementation="upscale_in_train" ) logits = L.fc( input=cls_feats, size=self.hparam['num_label'], param_attr=F.ParamAttr( name="cls_out_w", initializer=F.initializer.TruncatedNormal(scale=0.02)), bias_attr=F.ParamAttr( name="cls_out_b", initializer=F.initializer.Constant(0.)) ) propeller.summary.histogram('pred', logits) if self.mode is propeller.RunMode.PREDICT: probs = L.softmax(logits) return probs else: return logits
def _model(is_noise=False): ernie = ErnieModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, task_ids=task_ids, input_mask=input_mask, config=ernie_config, is_noise=is_noise) cls_feats = ernie.get_pooled_output() if not is_noise: cls_feats = fluid.layers.dropout( x=cls_feats, dropout_prob=0.1, dropout_implementation="upscale_in_train") logits = fluid.layers.fc( input=cls_feats, size=args.num_labels, param_attr=fluid.ParamAttr( name=task_name + "_cls_out_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr( name=task_name + "_cls_out_b", initializer=fluid.initializer.Constant(0.))) """ if is_prediction: probs = fluid.layers.softmax(logits) feed_targets_name = [ src_ids.name, sent_ids.name, pos_ids.name, input_mask.name ] if ernie_version == "2.0": feed_targets_name += [task_ids.name] return pyreader, probs, feed_targets_name """ num_seqs = fluid.layers.create_tensor(dtype='int64') ## add focal loss ce_loss, probs = fluid.layers.softmax_with_cross_entropy( logits=logits, label=labels, return_softmax=True) loss = fluid.layers.mean(x=ce_loss) accuracy = fluid.layers.accuracy(input=probs, label=labels, total=num_seqs) graph_vars = { "loss": loss, "probs": probs, "accuracy": accuracy, "labels": labels, "num_seqs": num_seqs, "qids": qids } return graph_vars
def forward(self, features): src_ids, sent_ids, input_seqlen = features zero = L.fill_constant([1], dtype='int64', value=0) input_mask = L.cast(L.equal(src_ids, zero), 'float32') # assume pad id == 0 #input_mask = L.unsqueeze(input_mask, axes=[2]) d_shape = L.shape(src_ids) seqlen = d_shape[1] batch_size = d_shape[0] pos_ids = L.unsqueeze(L.range(0, seqlen, 1, dtype='int32'), axes=[0]) pos_ids = L.expand(pos_ids, [batch_size, 1]) pos_ids = L.unsqueeze(pos_ids, axes=[2]) pos_ids = L.cast(pos_ids, 'int64') pos_ids.stop_gradient = True input_mask.stop_gradient = True task_ids = L.zeros_like( src_ids) + self.hparam.task_id #this shit wont use at the moment task_ids.stop_gradient = True model = ErnieModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, task_ids=task_ids, input_mask=input_mask, config=self.hparam, use_fp16=self.hparam['use_fp16']) enc_out = model.get_sequence_output() logits = L.fc( input=enc_out, size=self.num_label, num_flatten_dims=2, param_attr=F.ParamAttr( name="cls_seq_label_out_w", initializer=F.initializer.TruncatedNormal(scale=0.02)), bias_attr=F.ParamAttr(name="cls_seq_label_out_b", initializer=F.initializer.Constant(0.))) propeller.summary.histogram('pred', logits) return logits, input_seqlen
def create_model_predict(args, ernie_config, is_prediction=False): (src_ids, sent_ids, pos_ids, input_mask, task_ids) = make_all_inputs(args) ernie = ErnieModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, task_ids=task_ids, input_mask=input_mask, config=ernie_config, use_fp16=args.use_fp16) cls_feats = ernie.get_pooled_output() cls_feats = fluid.layers.dropout(x=cls_feats, dropout_prob=0.1, dropout_implementation="upscale_in_train") logits = fluid.layers.fc( input=cls_feats, size=args.num_labels, param_attr=fluid.ParamAttr( name="_cls_out_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr(name="_cls_out_b", initializer=fluid.initializer.Constant(0.))) if is_prediction: probs = fluid.layers.softmax(logits) feed_targets_name = [ src_ids.name, pos_ids.name, sent_ids.name, input_mask.name ] graph_vars = { "probs": probs, } for k, v in graph_vars.items(): v.persistable = True return probs, graph_vars return graph_vars
def create_model(args, pyreader_name, ernie_config, is_prediction=False): """func""" pyreader = fluid.layers.py_reader(capacity=50, shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1]], dtypes=[ 'int64', 'int64', 'int64', 'int64', 'float32', 'int64', 'int64' ], lod_levels=[0, 0, 0, 0, 0, 0, 0], name=pyreader_name, use_double_buffer=True) (src_ids, sent_ids, pos_ids, task_ids, input_mask, labels, seq_lens) = fluid.layers.read_file(pyreader) ernie = ErnieModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, task_ids=task_ids, input_mask=input_mask, config=ernie_config, use_fp16=args.use_fp16) enc_out = ernie.get_sequence_output() emission = fluid.layers.fc( input=enc_out, size=args.num_labels, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Uniform(low=-0.1, high=0.1), regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=1e-4)), num_flatten_dims=2) crf_cost = fluid.layers.linear_chain_crf( input=emission, label=labels, param_attr=fluid.ParamAttr(name='crfw', learning_rate=args.crf_learning_rate), length=seq_lens) loss = fluid.layers.mean(x=crf_cost) crf_decode = fluid.layers.crf_decoding( input=emission, param_attr=fluid.ParamAttr(name='crfw'), length=seq_lens) lod_labels = fluid.layers.squeeze(labels, axes=[-1]) num_chunk_types = ( (args.num_labels - 1) // (len(args.chunk_scheme) - 1)) # IOB配置 (_, _, _, num_infer, num_label, num_correct) = fluid.layers.chunk_eval(input=crf_decode, label=lod_labels, chunk_scheme=args.chunk_scheme, num_chunk_types=num_chunk_types, seq_length=seq_lens) """ enc_out = fluid.layers.dropout(x=enc_out, dropout_prob=0.1, dropout_implementation="upscale_in_train") logits = fluid.layers.fc( input=enc_out, size=args.num_labels, num_flatten_dims=2, param_attr=fluid.ParamAttr( name="cls_seq_label_out_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr(name="cls_seq_label_out_b", initializer=fluid.initializer.Constant(0.))) infers = fluid.layers.argmax(logits, axis=2) ret_infers = fluid.layers.reshape(x=infers, shape=[-1, 1]) lod_labels = fluid.layers.sequence_unpad(labels, seq_lens) lod_infers = fluid.layers.sequence_unpad(infers, seq_lens) num_chunk_types = ( (args.num_labels - 1) // (len(args.chunk_scheme) - 1)) # IOB配置 (_, _, _, num_infer, num_label, num_correct) = fluid.layers.chunk_eval(input=lod_infers, label=lod_labels, chunk_scheme=args.chunk_scheme, num_chunk_types=num_chunk_types) labels = fluid.layers.flatten(labels, axis=2) ce_loss, probs = fluid.layers.softmax_with_cross_entropy( logits=fluid.layers.flatten(logits, axis=2), label=labels, return_softmax=True) input_mask = fluid.layers.flatten(input_mask, axis=2) ce_loss = ce_loss * input_mask loss = fluid.layers.mean(x=ce_loss) """ graph_vars = { "inputs": src_ids, "loss": loss, "seqlen": seq_lens, "crf_decode": crf_decode, "num_infer": num_infer, "num_label": num_label, "num_correct": num_correct, } for k, v in graph_vars.items(): v.persistable = True return pyreader, graph_vars
def create_model(args, pyreader_name, ernie_config, is_prediction=False): pyreader = fluid.layers.py_reader( capacity=50, shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1], [-1, 1]], dtypes=['int64', 'int64', 'int64', 'float32', 'int64', 'int64'], lod_levels=[0, 0, 0, 0, 0, 0], name=pyreader_name, use_double_buffer=True) (src_ids, sent_ids, pos_ids, input_mask, labels, qids) = fluid.layers.read_file(pyreader) ernie = ErnieModel( src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, input_mask=input_mask, config=ernie_config, use_fp16=args.use_fp16) cls_feats = ernie.get_pooled_output() cls_feats = fluid.layers.dropout( x=cls_feats, dropout_prob=0.1, dropout_implementation="upscale_in_train") logits = fluid.layers.fc( input=cls_feats, size=args.num_labels, param_attr=fluid.ParamAttr( name="cls_out_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr( name="cls_out_b", initializer=fluid.initializer.Constant(0.))) if is_prediction: probs = fluid.layers.softmax(logits) feed_targets_name = [ src_ids.name, sent_ids.name, pos_ids.name, input_mask.name ] return pyreader, probs, feed_targets_name ce_loss, probs = fluid.layers.softmax_with_cross_entropy( logits=logits, label=labels, return_softmax=True) loss = fluid.layers.mean(x=ce_loss) if args.use_fp16 and args.loss_scaling > 1.0: loss *= args.loss_scaling num_seqs = fluid.layers.create_tensor(dtype='int64') accuracy = fluid.layers.accuracy(input=probs, label=labels, total=num_seqs) auc, batch_auc, [batch_stat_pos, batch_stat_neg, stat_pos, stat_neg] = fluid.layers.auc(input=probs, label=labels) graph_vars = { "loss": loss, "probs": probs, "accuracy": accuracy, "labels": labels, "num_seqs": num_seqs, "qids": qids, "auc": auc, "batch_auc": batch_auc, "batch_stat_pos": batch_stat_pos, "batch_stat_neg": batch_stat_neg, "stat_pos": stat_pos, "stat_neg": stat_neg } for k, v in graph_vars.items(): v.persistable = True return pyreader, graph_vars
def create_model(args, pyreader_name, ernie_config, is_prediction=False, task_name="", is_classify=False, is_regression=False, ernie_version="1.0"): """ this function is mainly for creating model and inputs placeholder """ if is_classify: pyreader = fluid.layers.py_reader(capacity=50, shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1], [-1, 1]], dtypes=[ 'int64', 'int64', 'int64', 'int64', 'float32', 'int64', 'int64' ], lod_levels=[0, 0, 0, 0, 0, 0, 0], name=task_name + "_" + pyreader_name, use_double_buffer=True) elif is_regression: pyreader = fluid.layers.py_reader(capacity=50, shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1], [-1, 1]], dtypes=[ 'int64', 'int64', 'int64', 'int64', 'float32', 'float32', 'int64' ], lod_levels=[0, 0, 0, 0, 0, 0, 0], name=task_name + "_" + pyreader_name, use_double_buffer=True) (src_ids, sent_ids, pos_ids, task_ids, input_mask, labels, qids) = fluid.layers.read_file(pyreader) ernie = ErnieModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, task_ids=task_ids, input_mask=input_mask, config=ernie_config, use_fp16=args.use_fp16) cls_feats = ernie.get_pooled_output() cls_feats = fluid.layers.dropout(x=cls_feats, dropout_prob=0.1, dropout_implementation="upscale_in_train") logits = fluid.layers.fc( input=cls_feats, size=args.num_labels, param_attr=fluid.ParamAttr( name=task_name + "_cls_out_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr(name=task_name + "_cls_out_b", initializer=fluid.initializer.Constant(0.))) if is_prediction: probs = fluid.layers.softmax(logits) feed_targets_name = [ src_ids.name, sent_ids.name, pos_ids.name, input_mask.name ] if ernie_version == "2.0": feed_targets_name += [task_ids.name] return pyreader, probs, feed_targets_name assert is_classify != is_regression, 'is_classify or is_regression must be true and only one of them can be true' num_seqs = fluid.layers.create_tensor(dtype='int64') if is_classify: ce_loss, probs = fluid.layers.softmax_with_cross_entropy( logits=logits, label=labels, return_softmax=True) loss = fluid.layers.mean(x=ce_loss) accuracy = fluid.layers.accuracy(input=probs, label=labels, total=num_seqs) graph_vars = { "loss": loss, "probs": probs, "accuracy": accuracy, "labels": labels, "num_seqs": num_seqs, "qids": qids } elif is_regression: cost = fluid.layers.square_error_cost(input=logits, label=labels) loss = fluid.layers.mean(x=cost) graph_vars = { "loss": loss, "probs": logits, "labels": labels, "num_seqs": num_seqs, "qids": qids } else: raise ValueError( 'unsupported fine tune mode. only supported classify/regression') return pyreader, graph_vars
def create_model(args, pyreader_name, ernie_config, is_training): pyreader = fluid.layers.py_reader(capacity=50, shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1], [-1, 1], [-1, 1]], dtypes=[ 'int64', 'int64', 'int64', 'int64', 'float32', 'int64', 'int64', 'int64' ], lod_levels=[0, 0, 0, 0, 0, 0, 0, 0], name=pyreader_name, use_double_buffer=True) (src_ids, sent_ids, pos_ids, task_ids, input_mask, start_positions, end_positions, unique_id) = fluid.layers.read_file(pyreader) ernie = ErnieModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, task_ids=task_ids, input_mask=input_mask, config=ernie_config, use_fp16=args.use_fp16) enc_out = ernie.get_sequence_output() enc_out = fluid.layers.dropout(x=enc_out, dropout_prob=0.1, dropout_implementation="upscale_in_train") logits = fluid.layers.fc( input=enc_out, size=2, num_flatten_dims=2, param_attr=fluid.ParamAttr( name="cls_mrc_out_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr(name="cls_mrc_out_b", initializer=fluid.initializer.Constant(0.))) logits = fluid.layers.transpose(x=logits, perm=[2, 0, 1]) start_logits, end_logits = fluid.layers.unstack(x=logits, axis=0) batch_ones = fluid.layers.fill_constant_batch_size_like(input=start_logits, dtype='int64', shape=[1], value=1) num_seqs = fluid.layers.reduce_sum(input=batch_ones) def compute_loss(logits, positions): loss = fluid.layers.softmax_with_cross_entropy(logits=logits, label=positions) loss = fluid.layers.mean(x=loss) return loss start_loss = compute_loss(start_logits, start_positions) end_loss = compute_loss(end_logits, end_positions) loss = (start_loss + end_loss) / 2.0 if args.use_fp16 and args.loss_scaling > 1.0: loss *= args.loss_scaling graph_vars = { "loss": loss, "num_seqs": num_seqs, "unique_id": unique_id, "start_logits": start_logits, "end_logits": end_logits } for k, v in graph_vars.items(): v.persistable = True return pyreader, graph_vars
def create_model(args, pyreader_name, ernie_config, is_prediction=False): pyreader = fluid.layers.py_reader(capacity=50, shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1]], dtypes=[ 'int64', 'int64', 'int64', 'int64', 'float32', 'int64', 'int64' ], lod_levels=[0, 0, 0, 0, 0, 0, 0], name=pyreader_name, use_double_buffer=True) (src_ids, sent_ids, pos_ids, task_ids, input_mask, labels, seq_lens) = fluid.layers.read_file(pyreader) ernie = ErnieModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, task_ids=task_ids, input_mask=input_mask, config=ernie_config, use_fp16=args.use_fp16) enc_out = ernie.get_sequence_output() enc_out = fluid.layers.dropout(x=enc_out, dropout_prob=0.1, dropout_implementation="upscale_in_train") logits = fluid.layers.fc( input=enc_out, size=args.num_labels, num_flatten_dims=2, param_attr=fluid.ParamAttr( name="cls_seq_label_out_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr(name="cls_seq_label_out_b", initializer=fluid.initializer.Constant(0.))) infers = fluid.layers.argmax(logits, axis=2) ret_labels = fluid.layers.reshape(x=labels, shape=[-1, 1]) ret_infers = fluid.layers.reshape(x=infers, shape=[-1, 1]) lod_labels = fluid.layers.sequence_unpad(labels, seq_lens) lod_infers = fluid.layers.sequence_unpad(infers, seq_lens) (_, _, _, num_infer, num_label, num_correct) = fluid.layers.chunk_eval( input=lod_infers, label=lod_labels, chunk_scheme=args.chunk_scheme, num_chunk_types=((args.num_labels - 1) // (len(args.chunk_scheme) - 1))) labels = fluid.layers.flatten(labels, axis=2) ce_loss, probs = fluid.layers.softmax_with_cross_entropy( logits=fluid.layers.flatten(logits, axis=2), label=labels, return_softmax=True) input_mask = fluid.layers.flatten(input_mask, axis=2) ce_loss = ce_loss * input_mask loss = fluid.layers.mean(x=ce_loss) if args.use_fp16 and args.loss_scaling > 1.0: loss *= args.loss_scaling graph_vars = { "loss": loss, "probs": probs, "labels": ret_labels, "infers": ret_infers, "num_infer": num_infer, "num_label": num_label, "num_correct": num_correct, "seq_lens": seq_lens } for k, v in graph_vars.items(): v.persistable = True return pyreader, graph_vars
def create_model(args, pyreader_name, ernie_config, is_prediction=False, task_name="", is_classify=False, is_regression=False, ernie_version="1.0"): if is_classify: # 增加邻接矩阵和核心词的shape pyreader = fluid.layers.py_reader(capacity=50, shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1], [-1, 1], [-1, args.max_seq_len, args.max_seq_len], [-1, 2]], dtypes=[ 'int64', 'int64', 'int64', 'int64', 'float32', 'int64', 'int64', 'int64', 'int64' ], lod_levels=[0, 0, 0, 0, 0, 0, 0, 0, 0], name=task_name + "_" + pyreader_name, use_double_buffer=True) elif is_regression: pyreader = fluid.layers.py_reader(capacity=50, shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1], [-1, 1]], dtypes=['int64', 'int64', 'int64', 'int64', 'float32', 'float32', 'int64'], lod_levels=[0, 0, 0, 0, 0, 0, 0], name=task_name + "_" + pyreader_name, use_double_buffer=True) (src_ids, sent_ids, pos_ids, task_ids, input_mask, labels, qids, adj_mat, head_ids) = fluid.layers.read_file(pyreader) ernie = ErnieModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, task_ids=task_ids, input_mask=input_mask, config=ernie_config, use_fp16=args.use_fp16) erinie_output = ernie.get_sequence_output() cls_feats = ernie.get_pooled_output() # 增加GAT网络 gat = gnn.GAT(input_size=768, hidden_size=100, output_size=50, dropout=0.0, alpha=0.1, heads=12, layer=2) # 将ernie的表示和邻接矩阵输入到gat网络中得到包含句子结构信息的表示 gat_emb = gat.forward(erinie_output, adj_mat) # 提取核心词的表示 gat_emb = utils.index_sample(gat_emb, head_ids) # 将[CLS]和核心词的表示拼接,供下游网络使用 cls_feats = fluid.layers.concat([cls_feats, gat_emb], axis=1) cls_feats = fluid.layers.dropout(x=cls_feats, dropout_prob=0.1, dropout_implementation="upscale_in_train") logits = fluid.layers.fc(input=cls_feats, size=args.num_labels, param_attr=fluid.ParamAttr(name=task_name + "_cls_out_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr(name=task_name + "_cls_out_b", initializer=fluid.initializer.Constant(0.))) if is_prediction: probs = fluid.layers.softmax(logits) feed_targets_name = [src_ids.name, sent_ids.name, pos_ids.name, input_mask.name] if ernie_version == "2.0": feed_targets_name += [task_ids.name] return pyreader, probs, feed_targets_name assert is_classify != is_regression, 'is_classify or is_regression must be true and only one of them can be true' num_seqs = fluid.layers.create_tensor(dtype='int64') if is_classify: ce_loss, probs = fluid.layers.softmax_with_cross_entropy(logits=logits, label=labels, return_softmax=True) loss = fluid.layers.mean(x=ce_loss) accuracy = fluid.layers.accuracy(input=probs, label=labels, total=num_seqs) graph_vars = { "loss": loss, "probs": probs, "accuracy": accuracy, "labels": labels, "num_seqs": num_seqs, "qids": qids } elif is_regression: cost = fluid.layers.square_error_cost(input=logits, label=labels) loss = fluid.layers.mean(x=cost) graph_vars = {"loss": loss, "probs": logits, "labels": labels, "num_seqs": num_seqs, "qids": qids} else: raise ValueError('unsupported fine tune mode. only supported classify/regression') return pyreader, graph_vars
def create_model(args, phase, micro_bsz, dp_sharding_rank, dp_worldsize, topo): if args.use_sop: from reader.pretraining_ds_ernie_full_sent import make_pretrain_dataset else: from reader.pretraining_ds_mlm import make_pretrain_dataset # mask_label, mask_pos for mlm, labels for sop if args.use_sop: input_fields = { 'names': ['src_ids', 'sent_ids', 'mask_label', 'mask_pos', 'labels'], 'shapes': [[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1], [-1, 1], [-1, 1]], 'dtypes': ['int64', 'int64', 'int64', 'int64', 'int64'], 'lod_levels': [0, 0, 0, 0, 0], } else: input_fields = { 'names': ['src_ids', 'sent_ids', 'mask_label', 'mask_pos'], 'shapes': [[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1], [-1, 1]], 'dtypes': ['int64', 'int64', 'int64', 'int64'], 'lod_levels': [0, 0, 0, 0], } with fluid.device_guard("gpu:0"): inputs = [ fluid.data(name=input_fields['names'][i], shape=input_fields['shapes'][i], dtype=input_fields['dtypes'][i], lod_level=input_fields['lod_levels'][i]) for i in range(len(input_fields['names'])) ] if args.use_sop: (src_ids, sent_ids, mask_label, mask_pos, labels) = inputs else: (src_ids, sent_ids, mask_label, mask_pos) = inputs train_file_list = glob.glob(args.data_dir + "/*") vocab = {} with open(args.vocab_file) as r: for line in r: lines = line.strip().split('\t') vocab[lines[0]] = int(lines[1]) log.debug("========= worker: {} of {} ==========".format( dp_sharding_rank, dp_worldsize)) data_reader = make_pretrain_dataset('pt', train_file_list, True, vocab, micro_bsz, len(vocab), args.max_seq_len, dp_sharding_rank, dp_worldsize) with fluid.device_guard("gpu:0"): data_loader = fluid.io.DataLoader.from_generator(feed_list=inputs, capacity=70, iterable=False) places = fluid.CUDAPlace(int(os.environ.get('FLAGS_selected_gpus', 0))) def data_gen(): yield from data_reader data_loader.set_batch_generator(data_gen, places) ernie_config = ErnieConfig(args.ernie_config_file)._config_dict ernie_config["preln"] = args.preln weight_sharing = (topo.mp.size == 1 and topo.pp.size == 1 ) # pp mp should not do weight sharing with fluid.device_guard("gpu:0"): ernie = ErnieModel(src_ids, sent_ids, ernie_config, weight_sharing=weight_sharing, topo=topo) checkpoints = ernie._checkpoints checkpoints.pop(-1) with fluid.device_guard(f'gpu:{args.num_pp-1}'): mask_lm_loss, mean_mask_lm_loss = ernie.get_lm_output( mask_label, mask_pos) total_loss = mean_mask_lm_loss if args.use_sop: sop_acc, mean_sop_loss = ernie.get_next_sentence_output(labels) total_loss += mean_sop_loss if topo.pp.size > 1: mask_lm_loss.persistable = True mean_mask_lm_loss.persistable = True # checkpoints.extend([mask_lm_loss.name, mean_mask_lm_loss.name]) if args.use_sop: mean_sop_loss.persistable = True sop_acc.persistable = True # checkpoints.extend([mean_sop_loss.name, sop_acc.name]) total_loss.persistable = True # checkpoints.append(total_loss.name) if args.use_sop: graph_vars = { 'data_loader': data_loader, 'mask_lm_loss': mask_lm_loss, 'mean_mask_lm_loss': mean_mask_lm_loss, 'sop_loss': mean_sop_loss, 'sop_acc': sop_acc, 'total_loss': total_loss, 'checkpoints': checkpoints } else: graph_vars = { 'data_loader': data_loader, 'mask_lm_loss': mask_lm_loss, 'mean_mask_lm_loss': mean_mask_lm_loss, 'total_loss': total_loss, 'checkpoints': checkpoints, } return graph_vars
def create_model(args, pyreader_name, ernie_config): pyreader = fluid.layers.py_reader( capacity=50, #缓冲区的容量数据个数 shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, args.num_labels], [-1, 1], [-1, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1]], dtypes=[ 'int64', 'int64', 'int64', 'int64', 'float32', 'float32', 'int64', 'int64', 'int64', 'int64' ], lod_levels=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], name=pyreader_name, use_double_buffer=True) (src_ids, sent_ids, pos_ids, task_ids, input_mask, labels, seq_lens, example_index, tok_to_orig_start_index, tok_to_orig_end_index) = fluid.layers.read_file(pyreader) ernie = ErnieModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, task_ids=task_ids, input_mask=input_mask, config=ernie_config, use_fp16=args.use_fp16) #embedding+encoder enc_out = ernie.get_sequence_output() #get encoder layer enc_out = fluid.layers.dropout(x=enc_out, dropout_prob=0.1, dropout_implementation="upscale_in_train") logits = fluid.layers.fc( input=enc_out, size=args.num_labels, num_flatten_dims=2, param_attr=fluid.ParamAttr( name="cls_seq_label_out_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr(name="cls_seq_label_out_b", initializer=fluid.initializer.Constant(0.))) logits = fluid.layers.sigmoid(logits) lod_labels = fluid.layers.sequence_unpad(labels, seq_lens) lod_logits = fluid.layers.sequence_unpad(logits, seq_lens) lod_tok_to_orig_start_index = fluid.layers.sequence_unpad( tok_to_orig_start_index, seq_lens) lod_tok_to_orig_end_index = fluid.layers.sequence_unpad( tok_to_orig_end_index, seq_lens) labels = fluid.layers.flatten(labels, axis=2) logits = fluid.layers.flatten(logits, axis=2) input_mask = fluid.layers.flatten(input_mask, axis=2) # calculate loss log_logits = fluid.layers.log(logits) log_logits_neg = fluid.layers.log(1 - logits) ce_loss = 0. - labels * log_logits - (1 - labels) * log_logits_neg ce_loss = fluid.layers.reduce_mean(ce_loss, dim=1, keep_dim=True) ce_loss = ce_loss * input_mask loss = fluid.layers.mean(x=ce_loss) graph_vars = { "inputs": src_ids, "loss": loss, "seqlen": seq_lens, "lod_logit": lod_logits, "lod_label": lod_labels, "example_index": example_index, "tok_to_orig_start_index": lod_tok_to_orig_start_index, "tok_to_orig_end_index": lod_tok_to_orig_end_index } for k, v in graph_vars.items(): v.persistable = True return pyreader, graph_vars
def create_model(pyreader_name, ernie_config, task_group): """create_model""" ## get input shapes = [[bsz, args.max_seq_len, 1], [bsz, args.max_seq_len, 1], [bsz, args.max_seq_len, 1], [bsz, args.max_seq_len, 1], [bsz, args.max_seq_len, 1], [bsz, 1], [bsz, 1], [1], [bsz, 1], [bsz, 1], [bsz, 1]] names = [ "src_ids", "pos_ids", "sent_ids", "task_ids", "input_mask", "mask_label", "mask_pos", "lm_weight", "batch_mask", "loss_mask", "gather_idx" ] dtypes = [ "int64", "int64", "int64", "int64", "float32", "int64", "int64", "float32", "float32", "float32", "int64" ] cnt_general_input = len(shapes) for index, task in enumerate(task_group): shapes.extend([[bsz, 1], [1]]) names.extend(['task_label_' + str(index), 'task_weight_' + str(index)]) dtypes.extend(["int64", "float32"]) assert len(shapes) == len(names) == len( dtypes), "The three fields must have same size" inputs = [] for i in range(len(shapes)): inputs.append( fluid.layers.data(name=names[i], shape=shapes[i], dtype=dtypes[i], append_batch_size=False)) general_data, task_params = inputs[:cnt_general_input], inputs[ cnt_general_input:] src_ids, pos_ids, sent_ids, task_ids, input_mask, \ mask_label, mask_pos, lm_weight, batch_mask, loss_mask, gather_idx = general_data ## build graph ernie = ErnieModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, task_ids=task_ids, input_mask=input_mask, config=ernie_config, weight_sharing=args.weight_sharing, use_fp16=args.use_amp) mask_lm_loss = ernie.get_lm_output(mask_label, mask_pos) checkpoints = ernie.get_checkpoints() total_loss = mask_lm_loss * lm_weight graph_vars = [mask_lm_loss, lm_weight] index = 0 total_constract_loss = 0 for task in task_group: task_labels = task_params[index] task_weight = task_params[index + 1] task_loss, task_acc = ernie.get_task_output(task, task_labels, gather_idx) total_loss += task_loss * task_weight * task["loss_weight"] if task["constart"]: contract_loss = ernie.get_contrastive_loss(batch_mask, loss_mask) total_loss += contract_loss * task_weight total_constract_loss += contract_loss * task_weight graph_vars.extend([task_acc, task_weight]) index += 2 ## build output graph_vars.append(total_constract_loss) graph_vars.append(total_loss) #for var in graph_vars: # var.persistable = True fetch_vars = {"graph_vars": graph_vars, "checkpoints": checkpoints} return fetch_vars, names
def create_model(ernie_config, is_training=False): if is_training: input_fields = { 'names': [ 'src_ids', 'pos_ids', 'sent_ids', 'input_mask', 'start_positions', 'end_positions' ], 'shapes': [[None, None], [None, None], [None, None], [None, None, 1], [None, 1], [None, 1]], 'dtypes': ['int64', 'int64', 'int64', 'float32', 'int64', 'int64'], 'lod_levels': [0, 0, 0, 0, 0, 0], } else: input_fields = { 'names': ['src_ids', 'pos_ids', 'sent_ids', 'input_mask', 'unique_id'], 'shapes': [[None, None], [None, None], [None, None], [None, None, 1], [None, 1]], 'dtypes': ['int64', 'int64', 'int64', 'float32', 'int64'], 'lod_levels': [0, 0, 0, 0, 0], } inputs = [ fluid.data(name=input_fields['names'][i], shape=input_fields['shapes'][i], dtype=input_fields['dtypes'][i], lod_level=input_fields['lod_levels'][i]) for i in range(len(input_fields['names'])) ] data_loader = fluid.io.DataLoader.from_generator(feed_list=inputs, capacity=50, iterable=False) if is_training: (src_ids, pos_ids, sent_ids, input_mask, start_positions, end_positions) = inputs else: (src_ids, pos_ids, sent_ids, input_mask, unique_id) = inputs ernie = ErnieModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, input_mask=input_mask, config=ernie_config, use_fp16=args.use_fp16) enc_out = ernie.get_sequence_output() logits = fluid.layers.fc( input=enc_out, size=2, num_flatten_dims=2, param_attr=fluid.ParamAttr( name="cls_squad_out_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr(name="cls_squad_out_b", initializer=fluid.initializer.Constant(0.))) logits = fluid.layers.transpose(x=logits, perm=[2, 0, 1]) start_logits, end_logits = fluid.layers.unstack(x=logits, axis=0) batch_ones = fluid.layers.fill_constant_batch_size_like(input=start_logits, dtype='int64', shape=[1], value=1) num_seqs = fluid.layers.reduce_sum(input=batch_ones) if is_training: def compute_loss(logits, positions): loss = fluid.layers.softmax_with_cross_entropy(logits=logits, label=positions) loss = fluid.layers.mean(x=loss) return loss start_loss = compute_loss(start_logits, start_positions) end_loss = compute_loss(end_logits, end_positions) total_loss = (start_loss + end_loss) / 2.0 return data_loader, total_loss, num_seqs else: return data_loader, unique_id, start_logits, end_logits, num_seqs
def create_model(pyreader_name, ernie_config, task_group): """create_model""" src_ids = fluid.layers.data(name='src_ids', shape=[-1, args.max_seq_len, 1], dtype='int64') pos_ids = fluid.layers.data(name='pos_ids', shape=[-1, args.max_seq_len, 1], dtype='int64') sent_ids= fluid.layers.data(name='sent_ids', shape=[-1, args.max_seq_len, 1], dtype='int64') task_ids= fluid.layers.data(name='task_ids', shape=[-1, args.max_seq_len, 1], dtype='int64') input_mask = fluid.layers.data(name='input_mask', shape=[-1, args.max_seq_len, args.max_seq_len], dtype='float32') mask_label = fluid.layers.data(name='mask_label', shape=[-1, 1], dtype='int64') mask_pos = fluid.layers.data(name='mask_pos', shape=[-1, 1], dtype='int64') lm_weight = fluid.layers.data(name='lm_weight', shape=[1], dtype='float32', append_batch_size=False) batch_mask = fluid.layers.data(name='batch_mask', shape=[-1, 1], dtype='float32') loss_mask = fluid.layers.data(name="loss_mask", shape=[-1, 1], dtype='float32') gather_idx = fluid.layers.data(name="gather_idx", shape=[-1, 1], dtype='int64') task_params_all = [] for index, task in enumerate(task_group): name_label = 'task_label_' + str(index) name_weight = 'task_weight_' + str(index) task_label = fluid.layers.data(name=name_label, shape=[-1, 1], dtype='int64') task_weight = fluid.layers.data(name=name_weight, shape=[1], dtype='float32', append_batch_size=False) task_params_all.extend([task_label, task_weight]) fluid.reader.keep_data_loader_order(False) feed_list = [src_ids, pos_ids, sent_ids, task_ids, input_mask, \ mask_label, mask_pos, lm_weight, batch_mask, loss_mask, gather_idx] + task_params_all pyreader = fluid.io.DataLoader.from_generator( feed_list=feed_list, capacity=70, iterable=False) ernie = ErnieModel( src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, task_ids=task_ids, input_mask=input_mask, config=ernie_config, weight_sharing=args.weight_sharing, use_fp16=args.use_amp) mask_lm_loss = ernie.get_lm_output(mask_label, mask_pos) checkpoints = ernie.get_checkpoints() total_loss = mask_lm_loss * lm_weight graph_vars = [mask_lm_loss, lm_weight] index = 11 total_constract_loss = 0 for task in task_group: task_labels = feed_list[index] task_weight = feed_list[index + 1] task_loss, task_acc = ernie.get_task_output(task, task_labels, gather_idx) total_loss += task_loss * task_weight * task["loss_weight"] if task["constart"]: contract_loss = ernie.get_contrastive_loss(batch_mask, loss_mask) total_loss += contract_loss * task_weight total_constract_loss += contract_loss * task_weight graph_vars.extend([task_acc, task_weight]) index += 2 graph_vars.append(total_constract_loss) graph_vars.append(total_loss) for var in graph_vars: var.persistable = True fetch_vars = {"graph_vars": graph_vars, "checkpoints": checkpoints} return pyreader, fetch_vars
def create_model(args, pyreader_name, ernie_config, batch_size=16, is_prediction=False, task_name=""): pyreader = fluid.layers.py_reader( capacity=50, shapes=[[batch_size, args.q_max_seq_len, 1], [batch_size, args.q_max_seq_len, 1], [batch_size, args.q_max_seq_len, 1], [batch_size, args.q_max_seq_len, 1], [batch_size, args.q_max_seq_len, 1], [batch_size, args.p_max_seq_len, 1], [batch_size, args.p_max_seq_len, 1], [batch_size, args.p_max_seq_len, 1], [batch_size, args.p_max_seq_len, 1], [batch_size, args.p_max_seq_len, 1], [batch_size, 1], [batch_size, 1]], dtypes=['int64', 'int64', 'int64', 'int64', 'float32', 'int64', 'int64', 'int64', 'int64', 'float32', 'int64', 'int64'], lod_levels=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], name=pyreader_name, use_double_buffer=True) (src_ids_q, sent_ids_q, pos_ids_q, task_ids_q, input_mask_q, src_ids_p, sent_ids_p, pos_ids_p, task_ids_p, input_mask_p, labels, qids) = fluid.layers.read_file(pyreader) ernie_q = ErnieModel( src_ids=src_ids_q, position_ids=pos_ids_q, sentence_ids=sent_ids_q, task_ids=task_ids_q, input_mask=input_mask_q, config=ernie_config, model_name='query_') ## pos para ernie_p = ErnieModel( src_ids=src_ids_p, position_ids=pos_ids_p, sentence_ids=sent_ids_p, task_ids=task_ids_p, input_mask=input_mask_p, config=ernie_config, model_name='titlepara_') q_cls_feats = ernie_q.get_cls_output() p_cls_feats = ernie_p.get_cls_output() #p_cls_feats = fluid.layers.concat([pos_cls_feats, neg_cls_feats], axis=0) #src_ids_p = fluid.layers.Print(src_ids_p, message='p: ') #p_cls_feats = fluid.layers.Print(p_cls_feats, message='p: ') #multiply logits = fluid.layers.matmul(q_cls_feats, p_cls_feats, transpose_x=False, transpose_y=True) probs = logits #fluid.layers.Print(probs, message='probs: ') #logits2 = fluid.layers.elementwise_mul(x=q_rep, y=p_rep) #fluid.layers.Print(logits2, message='logits2: ') #probs2 = fluid.layers.reduce_sum(logits, dim=-1) #fluid.layers.Print(probs2, message='probs2: ') matrix_labels = fluid.layers.eye(batch_size, batch_size, dtype='float32') matrix_labels.stop_gradient=True #print('DEBUG:\tstart loss') ce_loss, _ = fluid.layers.softmax_with_cross_entropy( logits=logits, label=matrix_labels, soft_label=True, return_softmax=True) loss = fluid.layers.mean(x=ce_loss) #print('DEBUG:\tloss done') matrix_labels = fluid.layers.argmax(matrix_labels, axis=-1) matrix_labels = fluid.layers.reshape(x=matrix_labels, shape=[batch_size, 1]) num_seqs = fluid.layers.create_tensor(dtype='int64') accuracy = fluid.layers.accuracy(input=probs, label=matrix_labels, total=num_seqs) #ce_loss, probs = fluid.layers.softmax_with_cross_entropy( # logits=logits, label=labels, return_softmax=True) #loss = fluid.layers.mean(x=ce_loss) #accuracy = fluid.layers.accuracy( # input=probs, label=labels, total=num_seqs) graph_vars = { "loss": loss, "probs": probs, "accuracy": accuracy, "labels": labels, "num_seqs": num_seqs, "qids": qids, "q_rep": q_cls_feats, "p_rep": p_cls_feats } return pyreader, graph_vars
def create_model(self, decoding=False): if decoding: return self.infilling_decode() if self.task_type == "dialog": emb_num = 4 else: emb_num = 3 input_shapes = [[-1, self.max_seq_len, 1]] * emb_num + \ [[-1, self.max_seq_len, self.max_seq_len]] query_input_shapes = [[-1, self.max_seq_len, 1]] * emb_num + \ [[-1, self.max_seq_len, self.max_seq_len * 2]] input_dtypes = ['int64'] * emb_num + ['float32'] input_lod_levels = [0] * emb_num + [0] shapes = input_shapes + query_input_shapes + [[-1, 1], [-1, 1]] dtypes = input_dtypes * 2 + ['int64', 'int64'] lod_levels = input_lod_levels * 2 + [0, 0] inputs = self.to_ternsor(shapes, dtypes, lod_levels) pyreader = fluid.io.DataLoader.from_generator(feed_list=inputs, capacity=50, iterable=False) emb_ids = [{}, {}] for key, value in zip(self.emb_keys, inputs[:emb_num]): emb_ids[0][key] = value for key, value in zip(self.emb_keys, inputs[emb_num + 1:emb_num * 2 + 1]): emb_ids[1][key] = value input_mask, input_query_mask = inputs[emb_num], inputs[2 * emb_num + 1] tgt_labels, tgt_pos = inputs[-2:] ernie = ErnieModel(emb_ids=emb_ids, input_mask=[input_mask, input_query_mask], config=self.ernie_config, use_fp16=self.use_fp16, task_type=self.task_type) enc_out = ernie.get_sequence_output() fc_out = self.cal_logit(enc_out, tgt_pos) if self.label_smooth: out_size = self.ernie_config[ "tgt_vocab_size"] or self.ernie_config['vocab_size'] labels = fluid.layers.label_smooth(label=fluid.layers.one_hot( input=tgt_labels, depth=out_size), epsilon=self.label_smooth) ce_loss = layers.softmax_with_cross_entropy(logits=fc_out, label=labels, soft_label=True) #probs = fluid.layers.log(fluid.layers.softmax(fc_out)) #ce_loss = fluid.layers.kldiv_loss(probs, labels, reduction='batchmean') else: ce_loss, probs = fluid.layers.softmax_with_cross_entropy( logits=fc_out, label=tgt_labels, return_softmax=True) loss = fluid.layers.mean(x=ce_loss) graph_vars = {"loss": loss} for k, v in graph_vars.items(): v.persistable = True return pyreader, graph_vars
def create_model(args, pyreader_name, ernie_config, is_prediction=False, task_name="", is_classify=False, is_regression=False, ernie_version="1.0"): src_ids = fluid.layers.data(name='eval_placeholder_0', shape=[-1, args.max_seq_len, 1], dtype='int64') sent_ids = fluid.layers.data(name='eval_placeholder_1', shape=[-1, args.max_seq_len, 1], dtype='int64') pos_ids = fluid.layers.data(name='eval_placeholder_2', shape=[-1, args.max_seq_len, 1], dtype='int64') input_mask = fluid.layers.data(name='eval_placeholder_3', shape=[-1, args.max_seq_len, 1], dtype='float32') task_ids = fluid.layers.data(name='eval_placeholder_4', shape=[-1, args.max_seq_len, 1], dtype='int64') qids = fluid.layers.data(name='eval_placeholder_5', shape=[-1, 1], dtype='int64') if is_classify: labels = fluid.layers.data(name='6', shape=[-1, 1], dtype='int64') elif is_regression: labels = fluid.layers.data(name='6', shape=[-1, 1], dtype='float32') pyreader = fluid.io.DataLoader.from_generator(feed_list=[ src_ids, sent_ids, pos_ids, task_ids, input_mask, labels, qids ], capacity=70, iterable=False) ernie = ErnieModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, task_ids=task_ids, input_mask=input_mask, config=ernie_config, use_fp16=args.use_fp16) cls_feats = ernie.get_pooled_output() cls_feats = fluid.layers.dropout(x=cls_feats, dropout_prob=0.1, dropout_implementation="upscale_in_train") logits = fluid.layers.fc( input=cls_feats, size=args.num_labels, param_attr=fluid.ParamAttr( name=task_name + "_cls_out_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr(name=task_name + "_cls_out_b", initializer=fluid.initializer.Constant(0.))) assert is_classify != is_regression, 'is_classify or is_regression must be true and only one of them can be true' if is_prediction: if is_classify: probs = fluid.layers.softmax(logits) else: probs = logits feed_targets_name = [ src_ids.name, sent_ids.name, pos_ids.name, input_mask.name ] if ernie_version == "2.0": feed_targets_name += [task_ids.name] return pyreader, probs, feed_targets_name num_seqs = fluid.layers.create_tensor(dtype='int64') if is_classify: ce_loss, probs = fluid.layers.softmax_with_cross_entropy( logits=logits, label=labels, return_softmax=True) loss = fluid.layers.mean(x=ce_loss) accuracy = fluid.layers.accuracy(input=probs, label=labels, total=num_seqs) graph_vars = { "loss": loss, "probs": probs, "accuracy": accuracy, "labels": labels, "num_seqs": num_seqs, "qids": qids, "logits": logits # add for middle state } elif is_regression: cost = fluid.layers.square_error_cost(input=logits, label=labels) loss = fluid.layers.mean(x=cost) graph_vars = { "loss": loss, "probs": logits, "labels": labels, "num_seqs": num_seqs, "qids": qids } else: raise ValueError( 'unsupported fine tune mode. only supported classify/regression') return pyreader, graph_vars
def infilling_decode(self): if self.task_type == "dialog": emb_num = 4 else: emb_num = 3 input_shapes = [[-1, self.max_seq_len, 1]] * emb_num + \ [[-1, self.max_seq_len, self.max_seq_len]] input_dtypes = ['int64'] * emb_num + ['float32'] input_lod_levels = [0] * emb_num + [0] shapes = input_shapes + [[-1, self.max_seq_len, 1], [-1, self.max_seq_len, 1], [-1, 1], [-1], [-1, 1, self.max_seq_len], [-1, 1]] dtypes = input_dtypes + [ 'int64', 'int64', 'float32', 'int32', 'float32', 'int64' ] lod_levels = input_lod_levels + [2, 2, 2, 0, 0, 0] inputs = self.to_ternsor(shapes, dtypes, lod_levels) pyreader = fluid.io.DataLoader.from_generator(feed_list=inputs, capacity=50, iterable=False) emb_ids = {} for key, value in zip(self.emb_keys, inputs[:emb_num]): emb_ids[key] = value input_mask = inputs[emb_num] tgt_ids, tgt_pos, init_scores, parent_idx, tgt_input_mask, data_ids = inputs[ -6:] ernie = ErnieModel(emb_ids=emb_ids, input_mask=input_mask, config=self.ernie_config, use_fp16=self.use_fp16, task_type=self.task_type, decoding=True, gather_idx=parent_idx) max_len = layers.fill_constant(shape=[1], dtype=tgt_ids.dtype, value=self.max_dec_len, force_cpu=True) step_idx = layers.fill_constant(shape=[1], dtype=tgt_ids.dtype, value=0, force_cpu=True) pos_idx = layers.fill_constant(shape=[1], dtype=tgt_ids.dtype, value=1, force_cpu=True) cond = layers.less_than(x=step_idx, y=max_len) while_op = layers.While(cond) ids = layers.array_write(layers.reshape(tgt_ids, (-1, 1)), step_idx) pos_biases = layers.array_write(layers.reshape(tgt_pos, (-1, 1)), step_idx) scores = layers.array_write(init_scores, step_idx) tgt_masks = layers.array_write(tgt_input_mask, step_idx) with while_op.block(): pre_ids = layers.array_read(array=ids, i=step_idx) pre_ids = layers.reshape(pre_ids, (-1, 1, 1), inplace=True) pre_scores = layers.array_read(array=scores, i=step_idx) pos_bias = layers.array_read(array=pos_biases, i=step_idx) pos_bias = layers.gather(input=pos_bias, index=parent_idx) tmp_mask = layers.array_read(tgt_masks, i=step_idx) def gen_batch_like(value, dtype="int64", shape=[-1, 1, 1], is_scalar=True): if is_scalar: return layers.fill_constant_batch_size_like( input=parent_idx, value=value, shape=shape, dtype=dtype) else: return layers.elementwise_mul( x=layers.fill_constant_batch_size_like( input=parent_idx, value=1, shape=shape, dtype=dtype), y=value, axis=0) tmp_mask = layers.gather(input=tmp_mask, index=parent_idx) append_0_mask = gen_batch_like(0.0, dtype=tmp_mask.dtype) append_1_mask = gen_batch_like(1.0, dtype=tmp_mask.dtype) tmp_mask = layers.concat([tmp_mask, append_1_mask], axis=2) pre_mask = layers.concat([tmp_mask, append_0_mask], axis=2) cur_mask = layers.concat([tmp_mask, append_1_mask], axis=2) cur_ids = gen_batch_like(self.attn_id) pre_pos = gen_batch_like(step_idx, is_scalar=False) cur_pos = gen_batch_like(pos_idx, is_scalar=False) if self.continuous_position: pre_pos = pre_pos + pos_bias cur_pos = cur_pos + pos_bias dec_emb_ids = { "word_embedding": layers.concat([pre_ids, cur_ids], axis=1), "pos_embedding": layers.concat([pre_pos, cur_pos], axis=1) } if self.task_type == "dialog": role_ids = gen_batch_like(0) turn_ids = gen_batch_like(0) dec_emb_ids["role_embedding"] = layers.concat( [role_ids, role_ids], axis=1) dec_emb_ids["turn_embedding"] = layers.concat( [turn_ids, turn_ids], axis=1) else: sent_ids = gen_batch_like(self.tgt_type_id) dec_emb_ids["sent_embedding"] = layers.concat( [sent_ids, sent_ids], axis=1) dec_mask = layers.concat([pre_mask, cur_mask], axis=1) dec_out = ernie.encode(dec_emb_ids, dec_mask, parent_idx, remove_query=True) fc_out = self.cal_logit(dec_out[:, 1:, :], None) topk_scores, topk_indices = layers.topk( input=layers.softmax(fc_out), k=self.beam_size) pre_lenpen = layers.pow( (5.0 + layers.cast(step_idx, pre_scores.dtype)) / 6.0, self.length_penalty) cur_lenpen = layers.pow( (5.0 + layers.cast(pos_idx, pre_scores.dtype)) / 6.0, self.length_penalty) accu_scores = layers.elementwise_add(x=layers.log(topk_scores), y=pre_scores * pre_lenpen, axis=0) / cur_lenpen topk_indices = layers.lod_reset(topk_indices, pre_ids) accu_scores = layers.lod_reset(accu_scores, pre_ids) selected_ids, selected_scores, gather_idx = layers.beam_search( pre_ids=pre_ids, pre_scores=pre_scores, ids=topk_indices, scores=accu_scores, beam_size=self.beam_size, end_id=self.eos_idx, return_parent_idx=True) layers.increment(x=step_idx, value=1.0, in_place=True) layers.increment(x=pos_idx, value=1.0, in_place=True) layers.array_write(selected_ids, i=step_idx, array=ids) layers.array_write(selected_scores, i=step_idx, array=scores) layers.array_write(tmp_mask, i=step_idx, array=tgt_masks) layers.array_write(pos_bias, i=step_idx, array=pos_biases) layers.assign(gather_idx, parent_idx) length_cond = layers.less_than(x=step_idx, y=max_len) finish_cond = layers.logical_not(layers.is_empty(x=selected_ids)) layers.logical_and(x=length_cond, y=finish_cond, out=cond) finished_ids, finished_scores = layers.beam_search_decode( ids, scores, beam_size=self.beam_size, end_id=self.eos_idx) graph_vars = { "finished_ids": finished_ids, "finished_scores": finished_scores, "data_ids": data_ids } for k, v in graph_vars.items(): v.persistable = True return pyreader, graph_vars
def create_model(args, pyreader_name, ernie_config, is_prediction=False): src_ids = fluid.layers.data(name='1', shape=[-1, args.max_seq_len, 1], dtype='int64') sent_ids = fluid.layers.data(name='2', shape=[-1, args.max_seq_len, 1], dtype='int64') pos_ids = fluid.layers.data(name='3', shape=[-1, args.max_seq_len, 1], dtype='int64') task_ids = fluid.layers.data(name='4', shape=[-1, args.max_seq_len, 1], dtype='int64') input_mask = fluid.layers.data(name='5', shape=[-1, args.max_seq_len, 1], dtype='float32') labels = fluid.layers.data(name='7', shape=[-1, args.max_seq_len, 1], dtype='int64') seq_lens = fluid.layers.data(name='8', shape=[-1], dtype='int64') pyreader = fluid.io.DataLoader.from_generator(feed_list=[ src_ids, sent_ids, pos_ids, task_ids, input_mask, labels, seq_lens ], capacity=70, iterable=False) ernie = ErnieModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, task_ids=task_ids, input_mask=input_mask, config=ernie_config, use_fp16=args.use_fp16) enc_out = ernie.get_sequence_output() enc_out = fluid.layers.dropout(x=enc_out, dropout_prob=0.1, dropout_implementation="upscale_in_train") logits = fluid.layers.fc( input=enc_out, size=args.num_labels, num_flatten_dims=2, param_attr=fluid.ParamAttr( name="cls_seq_label_out_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr(name="cls_seq_label_out_b", initializer=fluid.initializer.Constant(0.))) infers = fluid.layers.argmax(logits, axis=2) ret_infers = fluid.layers.reshape(x=infers, shape=[-1, 1]) lod_labels = fluid.layers.sequence_unpad(labels, seq_lens) lod_infers = fluid.layers.sequence_unpad(infers, seq_lens) (_, _, _, num_infer, num_label, num_correct) = fluid.layers.chunk_eval( input=lod_infers, label=lod_labels, chunk_scheme=args.chunk_scheme, num_chunk_types=((args.num_labels - 1) // (len(args.chunk_scheme) - 1))) labels = fluid.layers.flatten(labels, axis=2) ce_loss, probs = fluid.layers.softmax_with_cross_entropy( logits=fluid.layers.flatten(logits, axis=2), label=labels, return_softmax=True) input_mask = fluid.layers.flatten(input_mask, axis=2) ce_loss = ce_loss * input_mask loss = fluid.layers.mean(x=ce_loss) graph_vars = { "inputs": src_ids, "loss": loss, "probs": probs, "seqlen": seq_lens, "num_infer": num_infer, "num_label": num_label, "num_correct": num_correct, } for k, v in graph_vars.items(): v.persistable = True return pyreader, graph_vars
def create_model(args, pyreader_name, ernie_config, batch_size=16, is_prediction=False, task_name="", fleet_handle=None): print ("DEBUG:\tclassify") pyreader = fluid.layers.py_reader( capacity=50, shapes=[[batch_size, args.q_max_seq_len, 1], [batch_size, args.q_max_seq_len, 1], [batch_size, args.q_max_seq_len, 1], [batch_size, args.q_max_seq_len, 1], [batch_size, args.q_max_seq_len, 1], [batch_size, args.p_max_seq_len, 1], [batch_size, args.p_max_seq_len, 1], [batch_size, args.p_max_seq_len, 1], [batch_size, args.p_max_seq_len, 1], [batch_size, args.p_max_seq_len, 1], [batch_size, args.p_max_seq_len, 1], [batch_size, args.p_max_seq_len, 1], [batch_size, args.p_max_seq_len, 1], [batch_size, args.p_max_seq_len, 1], [batch_size, args.p_max_seq_len, 1], [batch_size, 1], [batch_size, 1]], dtypes=['int64', 'int64', 'int64', 'int64', 'float32', 'int64', 'int64', 'int64', 'int64', 'float32', 'int64', 'int64', 'int64', 'int64', 'float32', 'int64', 'int64'], lod_levels=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], name=task_name + "_" + pyreader_name, use_double_buffer=True) (src_ids_q, sent_ids_q, pos_ids_q, task_ids_q, input_mask_q, src_ids_p_pos, sent_ids_p_pos, pos_ids_p_pos, task_ids_p_pos, input_mask_p_pos, src_ids_p_neg, sent_ids_p_neg, pos_ids_p_neg, task_ids_p_neg, input_mask_p_neg, labels, qids) = fluid.layers.read_file(pyreader) ernie_q = ErnieModel( src_ids=src_ids_q, position_ids=pos_ids_q, sentence_ids=sent_ids_q, task_ids=task_ids_q, input_mask=input_mask_q, config=ernie_config, model_name='query_') ## pos para ernie_pos = ErnieModel( src_ids=src_ids_p_pos, position_ids=pos_ids_p_pos, sentence_ids=sent_ids_p_pos, task_ids=task_ids_p_pos, input_mask=input_mask_p_pos, config=ernie_config, model_name='titlepara_') ## neg para ernie_neg = ErnieModel( src_ids=src_ids_p_neg, position_ids=pos_ids_p_neg, sentence_ids=sent_ids_p_neg, task_ids=task_ids_p_neg, input_mask=input_mask_p_neg, config=ernie_config, model_name='titlepara_') q_cls_feats = ernie_q.get_cls_output() pos_cls_feats = ernie_pos.get_cls_output() neg_cls_feats = ernie_neg.get_cls_output() #src_ids_p_pos = fluid.layers.Print(src_ids_p_pos, message='pos: ') #pos_cls_feats = fluid.layers.Print(pos_cls_feats, message='pos: ') p_cls_feats = fluid.layers.concat([pos_cls_feats, neg_cls_feats], axis=0) if is_prediction: p_cls_feats = fluid.layers.slice(p_cls_feats, axes=[0], starts=[0], ends=[batch_size]) multi = fluid.layers.elementwise_mul(q_cls_feats, p_cls_feats) probs = fluid.layers.reduce_sum(multi, dim=-1) graph_vars = { "probs": probs, "qids": qids, "q_rep": q_cls_feats, "p_rep": p_cls_feats } return pyreader, graph_vars if args.use_cross_batch and fleet_handle is not None: print("worker num is: {}".format(fleet_handle.worker_num())) all_p_cls_feats = fluid.layers.collective._c_allgather( p_cls_feats, fleet_handle.worker_num(), use_calc_stream=True) #multiply logits = fluid.layers.matmul(q_cls_feats, all_p_cls_feats, transpose_x=False, transpose_y=True) worker_id = fleet_handle.worker_index() else: logits = fluid.layers.matmul(q_cls_feats, p_cls_feats, transpose_x=False, transpose_y=True) worker_id = 0 probs = logits all_labels = np.array(range(batch_size * worker_id * 2, batch_size * (worker_id * 2 + 1)), dtype='int64') matrix_labels = fluid.layers.assign(all_labels) matrix_labels = fluid.layers.unsqueeze(matrix_labels, axes=1) matrix_labels.stop_gradient=True # fluid.layers.Print(matrix_labels, message='matrix_labels') #print('DEBUG:\tstart loss') ce_loss = fluid.layers.softmax_with_cross_entropy( logits=logits, label=matrix_labels) loss = fluid.layers.mean(x=ce_loss) #print('DEBUG:\tloss done') num_seqs = fluid.layers.create_tensor(dtype='int64') accuracy = fluid.layers.accuracy( input=probs, label=matrix_labels) graph_vars = { "loss": loss, "probs": probs, "accuracy": accuracy, "labels": labels, "num_seqs": num_seqs, "qids": qids, "q_rep": q_cls_feats, "p_rep": p_cls_feats } cp = [] cp.extend(ernie_q.checkpoints) cp.extend(ernie_pos.checkpoints) cp.extend(ernie_neg.checkpoints) return pyreader, graph_vars, cp