def create_model(args, ernie_config): input_names = ("src_ids", "sent_ids", "pos_ids", "task_ids", "input_mask") shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1]] dtypes=[ 'int64', 'int64', 'int64', 'int64', 'float32' ] inputs = [fluid.data(name, shape, dtype=dtype) for name, shape, dtype in zip(input_names, shapes, dtypes)] (src_ids, sent_ids, pos_ids, task_ids, input_mask) = inputs ernie = ErnieModel( src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, task_ids=task_ids, input_mask=input_mask, config=ernie_config, use_fp16=args.use_fp16) seq_out = ernie.get_sequence_output() cls_feats = ernie.get_pooled_output() # dummy layers to name the latent layers. the save_inf_model produce uncomprehensible names # like 'save_infer_model/scale_1' seq_out = fluid.layers.scale(seq_out, scale=1.0, name='ernie_sequence_latent') cls_feats = fluid.layers.scale(cls_feats, scale=1.0, name='ernie_classification') for i, inp in enumerate(inputs): print(f'input[{i}]:', inp.name, inp.shape, inp.dtype) print('sequence_output :', seq_out.name, seq_out.shape, seq_out.dtype) print('classifier_output:', cls_feats.name, cls_feats.shape, cls_feats.dtype) return inputs, [seq_out, cls_feats]
def create_model(args, pyreader_name, ernie_config): pyreader = fluid.layers.py_reader( capacity=50, shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1]], dtypes=['int64', 'int64', 'int64', 'float', 'int64'], lod_levels=[0, 0, 0, 0, 0], name=pyreader_name, use_double_buffer=True) (src_ids, sent_ids, pos_ids, input_mask, seq_lens) = fluid.layers.read_file(pyreader) ernie = ErnieModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, input_mask=input_mask, config=ernie_config) enc_out = ernie.get_sequence_output() unpad_enc_out = fluid.layers.sequence_unpad(enc_out, length=seq_lens) cls_feats = ernie.get_pooled_output() # set persistable = True to avoid memory opimizing enc_out.persistable = True unpad_enc_out.persistable = True cls_feats.persistable = True graph_vars = { "cls_embeddings": cls_feats, "top_layer_embeddings": unpad_enc_out, } return pyreader, graph_vars
def create_model(args, pyreader_name, ernie_config, is_prediction=False): pyreader = fluid.layers.py_reader( capacity=50, shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, args.max_seq_len], [-1, args.max_seq_len, 1], [-1, 1]], dtypes=['int64', 'int64', 'int64', 'float', 'int64', 'int64'], lod_levels=[0, 0, 0, 0, 0, 0], name=pyreader_name, use_double_buffer=True) (src_ids, sent_ids, pos_ids, self_attn_mask, labels, seq_lens) = fluid.layers.read_file(pyreader) ernie = ErnieModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, self_attn_mask=self_attn_mask, config=ernie_config, use_fp16=args.use_fp16) enc_out = ernie.get_sequence_output() logits = fluid.layers.fc( input=enc_out, size=args.num_labels, num_flatten_dims=2, param_attr=fluid.ParamAttr( name="cls_seq_label_out_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr(name="cls_seq_label_out_b", initializer=fluid.initializer.Constant(0.))) ret_labels = fluid.layers.reshape(x=labels, shape=[-1, 1]) ret_infers = fluid.layers.reshape(x=fluid.layers.argmax(logits, axis=2), shape=[-1, 1]) labels = fluid.layers.flatten(labels, axis=2) ce_loss, probs = fluid.layers.softmax_with_cross_entropy( logits=fluid.layers.flatten(logits, axis=2), label=labels, return_softmax=True) loss = fluid.layers.mean(x=ce_loss) if args.use_fp16 and args.loss_scaling > 1.0: loss *= args.loss_scaling graph_vars = { "loss": loss, "probs": probs, "labels": ret_labels, "infers": ret_infers, "seq_lens": seq_lens } for k, v in graph_vars.items(): v.persistable = True return pyreader, graph_vars
def forward(self, features): src_ids, sent_ids, input_seqlen = features zero = L.fill_constant([1], dtype='int64', value=0) input_mask = L.cast(L.equal(src_ids, zero), 'float32') # assume pad id == 0 #input_mask = L.unsqueeze(input_mask, axes=[2]) d_shape = L.shape(src_ids) seqlen = d_shape[1] batch_size = d_shape[0] pos_ids = L.unsqueeze(L.range(0, seqlen, 1, dtype='int32'), axes=[0]) pos_ids = L.expand(pos_ids, [batch_size, 1]) pos_ids = L.unsqueeze(pos_ids, axes=[2]) pos_ids = L.cast(pos_ids, 'int64') pos_ids.stop_gradient = True input_mask.stop_gradient = True task_ids = L.zeros_like( src_ids) + self.hparam.task_id #this shit wont use at the moment task_ids.stop_gradient = True model = ErnieModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, task_ids=task_ids, input_mask=input_mask, config=self.hparam, use_fp16=self.hparam['use_fp16']) enc_out = model.get_sequence_output() logits = L.fc( input=enc_out, size=self.num_label, num_flatten_dims=2, param_attr=F.ParamAttr( name="cls_seq_label_out_w", initializer=F.initializer.TruncatedNormal(scale=0.02)), bias_attr=F.ParamAttr(name="cls_seq_label_out_b", initializer=F.initializer.Constant(0.))) propeller.summary.histogram('pred', logits) return logits, input_seqlen
def create_model(args, pyreader_name, ernie_config, is_prediction=False): src_ids = fluid.layers.data(name='1', shape=[-1, args.max_seq_len, 1], dtype='int64') sent_ids = fluid.layers.data(name='2', shape=[-1, args.max_seq_len, 1], dtype='int64') pos_ids = fluid.layers.data(name='3', shape=[-1, args.max_seq_len, 1], dtype='int64') task_ids = fluid.layers.data(name='4', shape=[-1, args.max_seq_len, 1], dtype='int64') input_mask = fluid.layers.data(name='5', shape=[-1, args.max_seq_len, 1], dtype='float32') labels = fluid.layers.data(name='7', shape=[-1, args.max_seq_len, 1], dtype='int64') seq_lens = fluid.layers.data(name='8', shape=[-1], dtype='int64') pyreader = fluid.io.DataLoader.from_generator(feed_list=[ src_ids, sent_ids, pos_ids, task_ids, input_mask, labels, seq_lens ], capacity=70, iterable=False) ernie = ErnieModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, task_ids=task_ids, input_mask=input_mask, config=ernie_config, use_fp16=args.use_fp16) enc_out = ernie.get_sequence_output() enc_out = fluid.layers.dropout(x=enc_out, dropout_prob=0.1, dropout_implementation="upscale_in_train") logits = fluid.layers.fc( input=enc_out, size=args.num_labels, num_flatten_dims=2, param_attr=fluid.ParamAttr( name="cls_seq_label_out_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr(name="cls_seq_label_out_b", initializer=fluid.initializer.Constant(0.))) infers = fluid.layers.argmax(logits, axis=2) ret_infers = fluid.layers.reshape(x=infers, shape=[-1, 1]) lod_labels = fluid.layers.sequence_unpad(labels, seq_lens) lod_infers = fluid.layers.sequence_unpad(infers, seq_lens) (_, _, _, num_infer, num_label, num_correct) = fluid.layers.chunk_eval( input=lod_infers, label=lod_labels, chunk_scheme=args.chunk_scheme, num_chunk_types=((args.num_labels - 1) // (len(args.chunk_scheme) - 1))) labels = fluid.layers.flatten(labels, axis=2) ce_loss, probs = fluid.layers.softmax_with_cross_entropy( logits=fluid.layers.flatten(logits, axis=2), label=labels, return_softmax=True) input_mask = fluid.layers.flatten(input_mask, axis=2) ce_loss = ce_loss * input_mask loss = fluid.layers.mean(x=ce_loss) graph_vars = { "inputs": src_ids, "loss": loss, "probs": probs, "seqlen": seq_lens, "num_infer": num_infer, "num_label": num_label, "num_correct": num_correct, } for k, v in graph_vars.items(): v.persistable = True return pyreader, graph_vars
def create_model(args, pyreader_name, ernie_config, is_training): pyreader = fluid.layers.py_reader(capacity=50, shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1], [-1, 1], [-1, 1]], dtypes=[ 'int64', 'int64', 'int64', 'int64', 'float32', 'int64', 'int64', 'int64' ], lod_levels=[0, 0, 0, 0, 0, 0, 0, 0], name=pyreader_name, use_double_buffer=True) (src_ids, sent_ids, pos_ids, task_ids, input_mask, start_positions, end_positions, unique_id) = fluid.layers.read_file(pyreader) ernie = ErnieModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, task_ids=task_ids, input_mask=input_mask, config=ernie_config, use_fp16=args.use_fp16) enc_out = ernie.get_sequence_output() enc_out = fluid.layers.dropout(x=enc_out, dropout_prob=0.1, dropout_implementation="upscale_in_train") logits = fluid.layers.fc( input=enc_out, size=2, num_flatten_dims=2, param_attr=fluid.ParamAttr( name="cls_mrc_out_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr(name="cls_mrc_out_b", initializer=fluid.initializer.Constant(0.))) logits = fluid.layers.transpose(x=logits, perm=[2, 0, 1]) start_logits, end_logits = fluid.layers.unstack(x=logits, axis=0) batch_ones = fluid.layers.fill_constant_batch_size_like(input=start_logits, dtype='int64', shape=[1], value=1) num_seqs = fluid.layers.reduce_sum(input=batch_ones) def compute_loss(logits, positions): loss = fluid.layers.softmax_with_cross_entropy(logits=logits, label=positions) loss = fluid.layers.mean(x=loss) return loss start_loss = compute_loss(start_logits, start_positions) end_loss = compute_loss(end_logits, end_positions) loss = (start_loss + end_loss) / 2.0 if args.use_fp16 and args.loss_scaling > 1.0: loss *= args.loss_scaling graph_vars = { "loss": loss, "num_seqs": num_seqs, "unique_id": unique_id, "start_logits": start_logits, "end_logits": end_logits } for k, v in graph_vars.items(): v.persistable = True return pyreader, graph_vars
def create_model(ernie_config, is_training=False): if is_training: input_fields = { 'names': [ 'src_ids', 'pos_ids', 'sent_ids', 'input_mask', 'start_positions', 'end_positions' ], 'shapes': [[None, None], [None, None], [None, None], [None, None, 1], [None, 1], [None, 1]], 'dtypes': ['int64', 'int64', 'int64', 'float32', 'int64', 'int64'], 'lod_levels': [0, 0, 0, 0, 0, 0], } else: input_fields = { 'names': ['src_ids', 'pos_ids', 'sent_ids', 'input_mask', 'unique_id'], 'shapes': [[None, None], [None, None], [None, None], [None, None, 1], [None, 1]], 'dtypes': ['int64', 'int64', 'int64', 'float32', 'int64'], 'lod_levels': [0, 0, 0, 0, 0], } inputs = [ fluid.data(name=input_fields['names'][i], shape=input_fields['shapes'][i], dtype=input_fields['dtypes'][i], lod_level=input_fields['lod_levels'][i]) for i in range(len(input_fields['names'])) ] data_loader = fluid.io.DataLoader.from_generator(feed_list=inputs, capacity=50, iterable=False) if is_training: (src_ids, pos_ids, sent_ids, input_mask, start_positions, end_positions) = inputs else: (src_ids, pos_ids, sent_ids, input_mask, unique_id) = inputs ernie = ErnieModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, input_mask=input_mask, config=ernie_config, use_fp16=args.use_fp16) enc_out = ernie.get_sequence_output() logits = fluid.layers.fc( input=enc_out, size=2, num_flatten_dims=2, param_attr=fluid.ParamAttr( name="cls_squad_out_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr(name="cls_squad_out_b", initializer=fluid.initializer.Constant(0.))) logits = fluid.layers.transpose(x=logits, perm=[2, 0, 1]) start_logits, end_logits = fluid.layers.unstack(x=logits, axis=0) batch_ones = fluid.layers.fill_constant_batch_size_like(input=start_logits, dtype='int64', shape=[1], value=1) num_seqs = fluid.layers.reduce_sum(input=batch_ones) if is_training: def compute_loss(logits, positions): loss = fluid.layers.softmax_with_cross_entropy(logits=logits, label=positions) loss = fluid.layers.mean(x=loss) return loss start_loss = compute_loss(start_logits, start_positions) end_loss = compute_loss(end_logits, end_positions) total_loss = (start_loss + end_loss) / 2.0 return data_loader, total_loss, num_seqs else: return data_loader, unique_id, start_logits, end_logits, num_seqs
def create_model(args, pyreader_name, ernie_config, is_prediction=False): pyreader = fluid.layers.py_reader(capacity=50, shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1]], dtypes=[ 'int64', 'int64', 'int64', 'int64', 'float32', 'int64', 'int64' ], lod_levels=[0, 0, 0, 0, 0, 0, 0], name=pyreader_name, use_double_buffer=True) (src_ids, sent_ids, pos_ids, task_ids, input_mask, labels, seq_lens) = fluid.layers.read_file(pyreader) ernie = ErnieModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, task_ids=task_ids, input_mask=input_mask, config=ernie_config, use_fp16=args.use_fp16) enc_out = ernie.get_sequence_output() enc_out = fluid.layers.dropout(x=enc_out, dropout_prob=0.1, dropout_implementation="upscale_in_train") logits = fluid.layers.fc( input=enc_out, size=args.num_labels, num_flatten_dims=2, param_attr=fluid.ParamAttr( name="cls_seq_label_out_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr(name="cls_seq_label_out_b", initializer=fluid.initializer.Constant(0.))) infers = fluid.layers.argmax(logits, axis=2) ret_labels = fluid.layers.reshape(x=labels, shape=[-1, 1]) ret_infers = fluid.layers.reshape(x=infers, shape=[-1, 1]) lod_labels = fluid.layers.sequence_unpad(labels, seq_lens) lod_infers = fluid.layers.sequence_unpad(infers, seq_lens) (_, _, _, num_infer, num_label, num_correct) = fluid.layers.chunk_eval( input=lod_infers, label=lod_labels, chunk_scheme=args.chunk_scheme, num_chunk_types=((args.num_labels - 1) // (len(args.chunk_scheme) - 1))) labels = fluid.layers.flatten(labels, axis=2) ce_loss, probs = fluid.layers.softmax_with_cross_entropy( logits=fluid.layers.flatten(logits, axis=2), label=labels, return_softmax=True) input_mask = fluid.layers.flatten(input_mask, axis=2) ce_loss = ce_loss * input_mask loss = fluid.layers.mean(x=ce_loss) if args.use_fp16 and args.loss_scaling > 1.0: loss *= args.loss_scaling graph_vars = { "loss": loss, "probs": probs, "labels": ret_labels, "infers": ret_infers, "num_infer": num_infer, "num_label": num_label, "num_correct": num_correct, "seq_lens": seq_lens } for k, v in graph_vars.items(): v.persistable = True return pyreader, graph_vars
def create_model(args, pyreader_name, ernie_config): pyreader = fluid.layers.py_reader( capacity=50, #缓冲区的容量数据个数 shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, args.num_labels], [-1, 1], [-1, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1]], dtypes=[ 'int64', 'int64', 'int64', 'int64', 'float32', 'float32', 'int64', 'int64', 'int64', 'int64' ], lod_levels=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], name=pyreader_name, use_double_buffer=True) (src_ids, sent_ids, pos_ids, task_ids, input_mask, labels, seq_lens, example_index, tok_to_orig_start_index, tok_to_orig_end_index) = fluid.layers.read_file(pyreader) ernie = ErnieModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, task_ids=task_ids, input_mask=input_mask, config=ernie_config, use_fp16=args.use_fp16) #embedding+encoder enc_out = ernie.get_sequence_output() #get encoder layer enc_out = fluid.layers.dropout(x=enc_out, dropout_prob=0.1, dropout_implementation="upscale_in_train") logits = fluid.layers.fc( input=enc_out, size=args.num_labels, num_flatten_dims=2, param_attr=fluid.ParamAttr( name="cls_seq_label_out_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr(name="cls_seq_label_out_b", initializer=fluid.initializer.Constant(0.))) logits = fluid.layers.sigmoid(logits) lod_labels = fluid.layers.sequence_unpad(labels, seq_lens) lod_logits = fluid.layers.sequence_unpad(logits, seq_lens) lod_tok_to_orig_start_index = fluid.layers.sequence_unpad( tok_to_orig_start_index, seq_lens) lod_tok_to_orig_end_index = fluid.layers.sequence_unpad( tok_to_orig_end_index, seq_lens) labels = fluid.layers.flatten(labels, axis=2) logits = fluid.layers.flatten(logits, axis=2) input_mask = fluid.layers.flatten(input_mask, axis=2) # calculate loss log_logits = fluid.layers.log(logits) log_logits_neg = fluid.layers.log(1 - logits) ce_loss = 0. - labels * log_logits - (1 - labels) * log_logits_neg ce_loss = fluid.layers.reduce_mean(ce_loss, dim=1, keep_dim=True) ce_loss = ce_loss * input_mask loss = fluid.layers.mean(x=ce_loss) graph_vars = { "inputs": src_ids, "loss": loss, "seqlen": seq_lens, "lod_logit": lod_logits, "lod_label": lod_labels, "example_index": example_index, "tok_to_orig_start_index": lod_tok_to_orig_start_index, "tok_to_orig_end_index": lod_tok_to_orig_end_index } for k, v in graph_vars.items(): v.persistable = True return pyreader, graph_vars
def create_model(args, pyreader_name, ernie_config, is_prediction=False): """func""" pyreader = fluid.layers.py_reader(capacity=50, shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1]], dtypes=[ 'int64', 'int64', 'int64', 'int64', 'float32', 'int64', 'int64' ], lod_levels=[0, 0, 0, 0, 0, 0, 0], name=pyreader_name, use_double_buffer=True) (src_ids, sent_ids, pos_ids, task_ids, input_mask, labels, seq_lens) = fluid.layers.read_file(pyreader) ernie = ErnieModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, task_ids=task_ids, input_mask=input_mask, config=ernie_config, use_fp16=args.use_fp16) enc_out = ernie.get_sequence_output() emission = fluid.layers.fc( input=enc_out, size=args.num_labels, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Uniform(low=-0.1, high=0.1), regularizer=fluid.regularizer.L2DecayRegularizer( regularization_coeff=1e-4)), num_flatten_dims=2) crf_cost = fluid.layers.linear_chain_crf( input=emission, label=labels, param_attr=fluid.ParamAttr(name='crfw', learning_rate=args.crf_learning_rate), length=seq_lens) loss = fluid.layers.mean(x=crf_cost) crf_decode = fluid.layers.crf_decoding( input=emission, param_attr=fluid.ParamAttr(name='crfw'), length=seq_lens) lod_labels = fluid.layers.squeeze(labels, axes=[-1]) num_chunk_types = ( (args.num_labels - 1) // (len(args.chunk_scheme) - 1)) # IOB配置 (_, _, _, num_infer, num_label, num_correct) = fluid.layers.chunk_eval(input=crf_decode, label=lod_labels, chunk_scheme=args.chunk_scheme, num_chunk_types=num_chunk_types, seq_length=seq_lens) """ enc_out = fluid.layers.dropout(x=enc_out, dropout_prob=0.1, dropout_implementation="upscale_in_train") logits = fluid.layers.fc( input=enc_out, size=args.num_labels, num_flatten_dims=2, param_attr=fluid.ParamAttr( name="cls_seq_label_out_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr(name="cls_seq_label_out_b", initializer=fluid.initializer.Constant(0.))) infers = fluid.layers.argmax(logits, axis=2) ret_infers = fluid.layers.reshape(x=infers, shape=[-1, 1]) lod_labels = fluid.layers.sequence_unpad(labels, seq_lens) lod_infers = fluid.layers.sequence_unpad(infers, seq_lens) num_chunk_types = ( (args.num_labels - 1) // (len(args.chunk_scheme) - 1)) # IOB配置 (_, _, _, num_infer, num_label, num_correct) = fluid.layers.chunk_eval(input=lod_infers, label=lod_labels, chunk_scheme=args.chunk_scheme, num_chunk_types=num_chunk_types) labels = fluid.layers.flatten(labels, axis=2) ce_loss, probs = fluid.layers.softmax_with_cross_entropy( logits=fluid.layers.flatten(logits, axis=2), label=labels, return_softmax=True) input_mask = fluid.layers.flatten(input_mask, axis=2) ce_loss = ce_loss * input_mask loss = fluid.layers.mean(x=ce_loss) """ graph_vars = { "inputs": src_ids, "loss": loss, "seqlen": seq_lens, "crf_decode": crf_decode, "num_infer": num_infer, "num_label": num_label, "num_correct": num_correct, } for k, v in graph_vars.items(): v.persistable = True return pyreader, graph_vars
def create_model(self, decoding=False): if decoding: return self.infilling_decode() if self.task_type == "dialog": emb_num = 4 else: emb_num = 3 input_shapes = [[-1, self.max_seq_len, 1]] * emb_num + \ [[-1, self.max_seq_len, self.max_seq_len]] query_input_shapes = [[-1, self.max_seq_len, 1]] * emb_num + \ [[-1, self.max_seq_len, self.max_seq_len * 2]] input_dtypes = ['int64'] * emb_num + ['float32'] input_lod_levels = [0] * emb_num + [0] shapes = input_shapes + query_input_shapes + [[-1, 1], [-1, 1]] dtypes = input_dtypes * 2 + ['int64', 'int64'] lod_levels = input_lod_levels * 2 + [0, 0] inputs = self.to_ternsor(shapes, dtypes, lod_levels) pyreader = fluid.io.DataLoader.from_generator(feed_list=inputs, capacity=50, iterable=False) emb_ids = [{}, {}] for key, value in zip(self.emb_keys, inputs[:emb_num]): emb_ids[0][key] = value for key, value in zip(self.emb_keys, inputs[emb_num + 1:emb_num * 2 + 1]): emb_ids[1][key] = value input_mask, input_query_mask = inputs[emb_num], inputs[2 * emb_num + 1] tgt_labels, tgt_pos = inputs[-2:] ernie = ErnieModel(emb_ids=emb_ids, input_mask=[input_mask, input_query_mask], config=self.ernie_config, use_fp16=self.use_fp16, task_type=self.task_type) enc_out = ernie.get_sequence_output() fc_out = self.cal_logit(enc_out, tgt_pos) if self.label_smooth: out_size = self.ernie_config[ "tgt_vocab_size"] or self.ernie_config['vocab_size'] labels = fluid.layers.label_smooth(label=fluid.layers.one_hot( input=tgt_labels, depth=out_size), epsilon=self.label_smooth) ce_loss = layers.softmax_with_cross_entropy(logits=fc_out, label=labels, soft_label=True) #probs = fluid.layers.log(fluid.layers.softmax(fc_out)) #ce_loss = fluid.layers.kldiv_loss(probs, labels, reduction='batchmean') else: ce_loss, probs = fluid.layers.softmax_with_cross_entropy( logits=fc_out, label=tgt_labels, return_softmax=True) loss = fluid.layers.mean(x=ce_loss) graph_vars = {"loss": loss} for k, v in graph_vars.items(): v.persistable = True return pyreader, graph_vars
def create_model(args, pyreader_name, ernie_config, is_prediction=False, task_name="", is_classify=False, is_regression=False, ernie_version="1.0"): if is_classify: # 增加邻接矩阵和核心词的shape pyreader = fluid.layers.py_reader(capacity=50, shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1], [-1, 1], [-1, args.max_seq_len, args.max_seq_len], [-1, 2]], dtypes=[ 'int64', 'int64', 'int64', 'int64', 'float32', 'int64', 'int64', 'int64', 'int64' ], lod_levels=[0, 0, 0, 0, 0, 0, 0, 0, 0], name=task_name + "_" + pyreader_name, use_double_buffer=True) elif is_regression: pyreader = fluid.layers.py_reader(capacity=50, shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1], [-1, 1]], dtypes=['int64', 'int64', 'int64', 'int64', 'float32', 'float32', 'int64'], lod_levels=[0, 0, 0, 0, 0, 0, 0], name=task_name + "_" + pyreader_name, use_double_buffer=True) (src_ids, sent_ids, pos_ids, task_ids, input_mask, labels, qids, adj_mat, head_ids) = fluid.layers.read_file(pyreader) ernie = ErnieModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, task_ids=task_ids, input_mask=input_mask, config=ernie_config, use_fp16=args.use_fp16) erinie_output = ernie.get_sequence_output() cls_feats = ernie.get_pooled_output() # 增加GAT网络 gat = gnn.GAT(input_size=768, hidden_size=100, output_size=50, dropout=0.0, alpha=0.1, heads=12, layer=2) # 将ernie的表示和邻接矩阵输入到gat网络中得到包含句子结构信息的表示 gat_emb = gat.forward(erinie_output, adj_mat) # 提取核心词的表示 gat_emb = utils.index_sample(gat_emb, head_ids) # 将[CLS]和核心词的表示拼接,供下游网络使用 cls_feats = fluid.layers.concat([cls_feats, gat_emb], axis=1) cls_feats = fluid.layers.dropout(x=cls_feats, dropout_prob=0.1, dropout_implementation="upscale_in_train") logits = fluid.layers.fc(input=cls_feats, size=args.num_labels, param_attr=fluid.ParamAttr(name=task_name + "_cls_out_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr(name=task_name + "_cls_out_b", initializer=fluid.initializer.Constant(0.))) if is_prediction: probs = fluid.layers.softmax(logits) feed_targets_name = [src_ids.name, sent_ids.name, pos_ids.name, input_mask.name] if ernie_version == "2.0": feed_targets_name += [task_ids.name] return pyreader, probs, feed_targets_name assert is_classify != is_regression, 'is_classify or is_regression must be true and only one of them can be true' num_seqs = fluid.layers.create_tensor(dtype='int64') if is_classify: ce_loss, probs = fluid.layers.softmax_with_cross_entropy(logits=logits, label=labels, return_softmax=True) loss = fluid.layers.mean(x=ce_loss) accuracy = fluid.layers.accuracy(input=probs, label=labels, total=num_seqs) graph_vars = { "loss": loss, "probs": probs, "accuracy": accuracy, "labels": labels, "num_seqs": num_seqs, "qids": qids } elif is_regression: cost = fluid.layers.square_error_cost(input=logits, label=labels) loss = fluid.layers.mean(x=cost) graph_vars = {"loss": loss, "probs": logits, "labels": labels, "num_seqs": num_seqs, "qids": qids} else: raise ValueError('unsupported fine tune mode. only supported classify/regression') return pyreader, graph_vars