def create_model(args, pyreader_name, bert_config, num_labels, is_prediction=False): pyreader = fluid.layers.py_reader( capacity=50, shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, args.max_seq_len], [-1, 1], [-1, 1]], dtypes=['int64', 'int64', 'int64', 'float', 'int64', 'int64'], lod_levels=[0, 0, 0, 0, 0, 0], name=pyreader_name, use_double_buffer=True) (src_ids, pos_ids, sent_ids, self_attn_mask, labels, next_sent_index) = fluid.layers.read_file(pyreader) bert = BertModel( src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, self_attn_mask=self_attn_mask, config=bert_config, use_fp16=args.use_fp16) cls_feats = bert.get_pooled_output(next_sent_index) cls_feats = fluid.layers.dropout( x=cls_feats, dropout_prob=0.1, dropout_implementation="upscale_in_train") logits = fluid.layers.fc( input=cls_feats, size=num_labels, param_attr=fluid.ParamAttr( name="cls_out_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr( name="cls_out_b", initializer=fluid.initializer.Constant(0.))) if is_prediction: probs = fluid.layers.softmax(logits) feed_targets_name = [ src_ids.name, pos_ids.name, sent_ids.name, self_attn_mask.name, next_sent_index.name ] return pyreader, probs, feed_targets_name ce_loss, probs = fluid.layers.softmax_with_cross_entropy( logits=logits, label=labels, return_softmax=True) loss = fluid.layers.mean(x=ce_loss) if args.use_fp16 and args.loss_scaling > 1.0: loss *= args.loss_scaling num_seqs = fluid.layers.create_tensor(dtype='int64') accuracy = fluid.layers.accuracy(input=probs, label=labels, total=num_seqs) return pyreader, loss, probs, accuracy, num_seqs
def create_model(args, bert_config, num_labels, is_prediction=False): input_fields = { 'names': ['src_ids', 'pos_ids', 'sent_ids', 'input_mask', 'labels'], 'shapes': [[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1]], 'dtypes': ['int64', 'int64', 'int64', 'float32', 'int64'], 'lod_levels': [0, 0, 0, 0, 0], } inputs = [ fluid.layers.data(name=input_fields['names'][i], shape=input_fields['shapes'][i], dtype=input_fields['dtypes'][i], lod_level=input_fields['lod_levels'][i]) for i in range(len(input_fields['names'])) ] (src_ids, pos_ids, sent_ids, input_mask, labels) = inputs pyreader = fluid.io.PyReader(feed_list=inputs, capacity=50, iterable=True) bert = BertModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, input_mask=input_mask, config=bert_config, use_fp16=False) cls_feats = bert.get_pooled_output() cls_feats = fluid.layers.dropout(x=cls_feats, dropout_prob=0.1, dropout_implementation="upscale_in_train") logits = fluid.layers.fc( input=cls_feats, size=num_labels, param_attr=fluid.ParamAttr( name="cls_out_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr(name="cls_out_b", initializer=fluid.initializer.Constant(0.))) if is_prediction: probs = fluid.layers.softmax(logits) feed_targets_name = [ src_ids.name, pos_ids.name, sent_ids.name, input_mask.name ] return pyreader, probs, feed_targets_name ce_loss, probs = fluid.layers.softmax_with_cross_entropy( logits=logits, label=labels, return_softmax=True) loss = fluid.layers.mean(x=ce_loss) num_seqs = fluid.layers.create_tensor(dtype='int64') accuracy = fluid.layers.accuracy(input=probs, label=labels, total=num_seqs) return pyreader, loss, probs, accuracy, num_seqs, bert.checkpoints
def creat_model_for_cls_output(args, vocab_size, is_prediction=False): # 处理词典大小 if args['vocab_size'] > 0: vocab_size = args['vocab_size'] # 输入定义 qas_ids = fluid.data(name='qas_ids', dtype='int64', shape=[-1, 1]) src_ids = fluid.data(name='src_ids', dtype='int64', shape=[-1, args['max_seq_length'], 1]) pos_ids = fluid.data(name='pos_ids', dtype='int64', shape=[-1, args['max_seq_length'], 1]) sent_ids = fluid.data(name='sent_ids', dtype='int64', shape=[-1, args['max_seq_length'], 1]) input_mask = fluid.data(name='input_mask', dtype='float32', shape=[-1, args['max_seq_length'], 1]) labels = fluid.data(name='labels', dtype='int64', shape=[-1, 1]) labels_for_reverse = fluid.data(name='labels_for_reverse', dtype='int64', shape=[-1, 1]) # 根据任务的不同调整所需的数据,预测任务相比训练任务缺少label这一项数据 if is_prediction: feed_list = [qas_ids, src_ids, pos_ids, sent_ids, input_mask] else: feed_list = [ qas_ids, src_ids, pos_ids, sent_ids, input_mask, labels, labels_for_reverse ] reader = fluid.io.DataLoader.from_generator(feed_list=feed_list, capacity=64, iterable=True) # 模型部分 # 由bert后接一层全连接完成预测任务 # bert部分 config = args config['vocab_size'] = vocab_size bert = BertModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, input_mask=input_mask, config=config, use_fp16=False, is_prediction=is_prediction) cls_feats = bert.get_pooled_output() if is_prediction: return reader, cls_feats, qas_ids return reader, cls_feats, qas_ids, labels
def create_model(args, vocab_size, is_prediction=False, is_validate=False): """ 搭建分类模型 被训练模块和预测模块直接调用 返回相关的计算结果和对应的dataloader对象 :param args: 参数 :param vocab_size: 词典大小,用于构建词嵌入层。注意当参数设置词典大小时,该项无效 :param is_prediction: 是否是预测模式,将禁用dropout等。 :param is_validate: 是否是验证模式,除了禁用dropout,还将返回loss和acc,如果输入数据中没有对应项,则会报错。 :return: """ # 处理词典大小 if args['vocab_size'] > 0: vocab_size = args['vocab_size'] # 输入定义 qas_ids = fluid.data(name='qas_ids', dtype='int64', shape=[-1, 1]) src_ids = fluid.data(name='src_ids', dtype='int64', shape=[-1, args['max_seq_length'], 1]) pos_ids = fluid.data(name='pos_ids', dtype='int64', shape=[-1, args['max_seq_length'], 1]) sent_ids = fluid.data(name='sent_ids', dtype='int64', shape=[-1, args['max_seq_length'], 1]) input_mask = fluid.data(name='input_mask', dtype='float32', shape=[-1, args['max_seq_length'], 1]) # 根据任务的不同调整所需的数据,预测任务相比训练任务缺少label这一项数据 labels = fluid.data(name='labels', dtype='int64', shape=[-1, 1]) # engineer_ids = fluid.data(name='engineer_ids', dtype='int64', shape=[-1, args['max_seq_length']+1, 1]) engineer_ids = fluid.data(name='engineer_ids', dtype='int64', shape=[-1, args['max_seq_length'], 1]) config = args if is_prediction: feed_list = [qas_ids, src_ids, pos_ids, sent_ids, input_mask] else: feed_list = [qas_ids, src_ids, pos_ids, sent_ids, input_mask, labels] if config['use_engineer']: feed_list.append(engineer_ids) reader = fluid.io.DataLoader.from_generator(feed_list=feed_list, capacity=64, iterable=True) # 模型部分 # 由bert后接一层全连接完成预测任务 # bert部分 config['vocab_size'] = vocab_size bert = BertModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, input_mask=input_mask, config=config, use_fp16=False, is_prediction=(is_prediction or is_validate)) mrc_layer = config['mrc_layer'] freeze_pretrained_model = config['freeze_pretrained_model'] cls_feats = bert.get_pooled_output() bert_encode = bert.get_sequence_output() if freeze_pretrained_model: cls_feats.stop_gradient = True bert_encode.stop_gradient = True if config['use_engineer']: # entity_sim = engineer_ids[:,-1,:] # entity_sim_code = fluid.layers.one_hot(input=entity_sim, depth=2, allow_out_of_range=False) # engineer_emb = fluid.layers.embedding(input=engineer_ids[:,:-1,:], size=[32, 8]) engineer_emb = fluid.layers.embedding(input=engineer_ids, size=[32, 8]) bert_encode = fluid.layers.concat(input=[bert_encode, engineer_emb], axis=-1) logits = None if mrc_layer == "cls_fc": # 取[CLS]的输出经全连接进行预测 cls_feats = fluid.layers.dropout( x=cls_feats, dropout_prob=0.1, is_test=(is_prediction or is_validate), dropout_implementation="upscale_in_train") logits = fluid.layers.fc( input=cls_feats, size=args['num_labels'], param_attr=fluid.ParamAttr( name="cls_out_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr( name="cls_out_b", initializer=fluid.initializer.Constant(0.))) elif mrc_layer == "capsNet": # 取完整的bert_output,输入胶囊网络 bert_output = bert_encode param_attr = fluid.ParamAttr( name='conv2d.weight', initializer=fluid.initializer.Xavier(uniform=False), learning_rate=0.001) bert_output = fluid.layers.unsqueeze(input=bert_output, axes=[1]) capsules = fluid.layers.conv2d(input=bert_output, num_filters=256, filter_size=32, stride=15, padding="VALID", act="relu", param_attr=param_attr) # (batch_size, 256, 33, 50) primaryCaps = CapsLayer(num_outputs=32, vec_len=8, with_routing=False, layer_type='CONV') caps1 = primaryCaps(capsules, kernel_size=9, stride=2) # (batch_size, 8736, 8, 1) classifierCaps = CapsLayer(num_outputs=args['num_labels'], vec_len=16, with_routing=True, layer_type='FC') caps2 = classifierCaps(caps1) # (batch_size, 3, 16, 1) epsilon = 1e-9 v_length = fluid.layers.sqrt( fluid.layers.reduce_sum( fluid.layers.square(caps2), -2, keep_dim=True) + epsilon) logits = fluid.layers.squeeze(v_length, axes=[2, 3]) elif mrc_layer == "lstm": hidden_size = args['lstm_hidden_size'] cell = fluid.layers.LSTMCell(hidden_size=hidden_size) cell_r = fluid.layers.LSTMCell(hidden_size=hidden_size) encoded = bert_encode[:, 1:, :] encoded = fluid.layers.dropout( x=encoded, is_test=(is_prediction or is_validate), dropout_prob=0.1, dropout_implementation="upscale_in_train") outputs = fluid.layers.rnn(cell, encoded)[0][:, -1, :] outputs_r = fluid.layers.rnn(cell_r, encoded, is_reverse=True)[0][:, -1, :] outputs = fluid.layers.concat(input=[outputs, outputs_r], axis=1) cls_feats = outputs cls_feats = fluid.layers.dropout( x=cls_feats, is_test=(is_prediction or is_validate), dropout_prob=0.1, dropout_implementation="upscale_in_train") # fc = fluid.layers.fc(input=cls_feats, size=hidden_size*2) # fc = fluid.layers.dropout( # x=fc, # dropout_prob=0.1, # dropout_implementation="upscale_in_train") logits = fluid.layers.fc( input=cls_feats, size=args['num_labels'], param_attr=fluid.ParamAttr( name="lstm_fc_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr( name="lstm_fc_b", initializer=fluid.initializer.Constant(0.))) elif mrc_layer == "highway_lstm": hidden_size = 128 cell = fluid.layers.LSTMCell(hidden_size=hidden_size) cell_r = fluid.layers.LSTMCell(hidden_size=hidden_size) encoded = bert_encode[:, 1:, :] encoded = fluid.layers.dropout( x=encoded, is_test=(is_prediction or is_validate), dropout_prob=0.1, dropout_implementation="upscale_in_train") encoded = highway_layer(encoded, name="highway1", num_flatten_dims=2) encoded = fluid.layers.dropout( x=encoded, is_test=(is_prediction or is_validate), dropout_prob=0.1, dropout_implementation="upscale_in_train") outputs = fluid.layers.rnn(cell, encoded)[0][:, -1, :] outputs_r = fluid.layers.rnn(cell_r, encoded, is_reverse=True)[0][:, -1, :] outputs = fluid.layers.concat(input=[outputs, outputs_r], axis=1) cls_feats = outputs cls_feats = fluid.layers.dropout( x=cls_feats, is_test=(is_prediction or is_validate), dropout_prob=0.1, dropout_implementation="upscale_in_train") # fc = fluid.layers.fc(input=cls_feats, size=hidden_size*2) # fc = fluid.layers.dropout( # x=fc, # dropout_prob=0.1, # dropout_implementation="upscale_in_train") logits = fluid.layers.fc( input=cls_feats, size=args['num_labels'], param_attr=fluid.ParamAttr( name="lstm_fc_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr( name="lstm_fc_b", initializer=fluid.initializer.Constant(0.))) # 根据任务返回不同的结果 # 预测任务仅返回dataloader和预测出的每个label对应的概率 if is_prediction and not is_validate: probs = fluid.layers.softmax(logits) return reader, probs, qas_ids # 训练任务则计算loss ce_loss, probs = fluid.layers.softmax_with_cross_entropy( logits=logits, label=labels, return_softmax=True) # loss = fluid.layers.mean(x=ce_loss) weight = fluid.layers.assign(np.array([[1.], [1.], [1.3]], dtype='float32')) def lossweighed(ce_loss, labels): one_hot = fluid.one_hot(input=labels, depth=args["num_labels"]) lw = fluid.layers.matmul(one_hot, weight) lw = fluid.layers.reduce_sum(lw, dim=1) loss = fluid.layers.elementwise_mul(lw, ce_loss) loss = fluid.layers.mean(loss) return loss loss = lossweighed(ce_loss, labels) if args['use_fp16'] and args.loss_scaling > 1.0: loss *= args.loss_scaling num_seqs = fluid.layers.create_tensor(dtype='int64') accuracy = fluid.layers.accuracy(input=probs, label=labels, total=num_seqs) # 返回dataloader,loss,预测结果,和准确度 return reader, loss, probs, accuracy, qas_ids
def create_model(args, pyreader_name, bert_config, num_labels, is_prediction=False): """ define fine-tuning model """ if args.binary: pyreader = fluid.layers.py_reader( capacity=50, shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1], [-1, 1]], dtypes=['int64', 'int64', 'int64', 'float32', 'int64', 'int64'], lod_levels=[0, 0, 0, 0, 0, 0], name=pyreader_name, use_double_buffer=True) (src_ids, pos_ids, sent_ids, input_mask, seq_len, labels) = fluid.layers.read_file(pyreader) bert = BertModel( src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, input_mask=input_mask, config=bert_config, use_fp16=args.use_fp16) if args.sub_model_type == 'raw': cls_feats = bert.get_pooled_output() elif args.sub_model_type == 'cnn': bert_seq_out = bert.get_sequence_output() bert_seq_out = fluid.layers.sequence_unpad(bert_seq_out, seq_len) cnn_hidden_size = 100 convs = [] for h in [3, 4, 5]: conv_feats = fluid.layers.sequence_conv( input=bert_seq_out, num_filters=cnn_hidden_size, filter_size=h) conv_feats = fluid.layers.batch_norm(input=conv_feats, act="relu") conv_feats = fluid.layers.sequence_pool( input=conv_feats, pool_type='max') convs.append(conv_feats) cls_feats = fluid.layers.concat(input=convs, axis=1) elif args.sub_model_type == 'gru': bert_seq_out = bert.get_sequence_output() bert_seq_out = fluid.layers.sequence_unpad(bert_seq_out, seq_len) gru_hidden_size = 1024 gru_input = fluid.layers.fc(input=bert_seq_out, size=gru_hidden_size * 3) gru_forward = fluid.layers.dynamic_gru( input=gru_input, size=gru_hidden_size, is_reverse=False) gru_backward = fluid.layers.dynamic_gru( input=gru_input, size=gru_hidden_size, is_reverse=True) gru_output = fluid.layers.concat([gru_forward, gru_backward], axis=1) cls_feats = fluid.layers.sequence_pool( input=gru_output, pool_type='max') elif args.sub_model_type == 'ffa': bert_seq_out = bert.get_sequence_output() attn = fluid.layers.fc(input=bert_seq_out, num_flatten_dims=2, size=1, act='tanh') attn = fluid.layers.softmax(attn) weighted_input = bert_seq_out * attn weighted_input = fluid.layers.sequence_unpad(weighted_input, seq_len) cls_feats = fluid.layers.sequence_pool(weighted_input, pool_type='sum') else: raise NotImplementedError("%s is not implemented!" % args.sub_model_type) cls_feats = fluid.layers.dropout( x=cls_feats, dropout_prob=0.1, dropout_implementation="upscale_in_train") logits = fluid.layers.fc( input=cls_feats, size=num_labels, param_attr=fluid.ParamAttr( name="cls_out_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr( name="cls_out_b", initializer=fluid.initializer.Constant(0.))) probs = fluid.layers.softmax(logits) if is_prediction: feed_targets_name = [ src_ids.name, pos_ids.name, sent_ids.name, input_mask.name ] return pyreader, probs, feed_targets_name ce_loss = fluid.layers.softmax_with_cross_entropy( logits=logits, label=labels) loss = fluid.layers.mean(x=ce_loss) if args.use_fp16 and args.loss_scaling > 1.0: loss *= args.loss_scaling num_seqs = fluid.layers.create_tensor(dtype='int64') accuracy = fluid.layers.accuracy(input=probs, label=labels, total=num_seqs) return (pyreader, loss, probs, accuracy, labels, num_seqs)
def create_model(args, bert_config, num_labels, is_prediction=False, k=0, n=0, q=0, task_name=""): input_fields = { 'names': ['src_ids', 'pos_ids', 'sent_ids', 'input_mask', 'labels'], 'shapes': [[None, None], [None, None], [None, None], [None, None, 1], [None, 1]], 'dtypes': ['int64', 'int64', 'int64', 'float32', 'int64'], 'lod_levels': [0, 0, 0, 0, 0], } inputs = [ fluid.data(name=input_fields['names'][i], shape=input_fields['shapes'][i], dtype=input_fields['dtypes'][i], lod_level=input_fields['lod_levels'][i]) for i in range(len(input_fields['names'])) ] (src_ids, pos_ids, sent_ids, input_mask, labels) = inputs data_loader = fluid.io.DataLoader.from_generator(feed_list=inputs, capacity=50, iterable=True) bert = BertModel(src_ids=src_ids, position_ids=pos_ids, sentence_ids=sent_ids, input_mask=input_mask, config=bert_config, use_fp16=args.use_fp16) cls_feats = bert.get_pooled_output() cls_feats = fluid.layers.dropout(x=cls_feats, dropout_prob=0.1, dropout_implementation="upscale_in_train") hidden = fluid.layers.fc( input=cls_feats, num_flatten_dims=2, size=num_labels, param_attr=fluid.ParamAttr( name="cls_out_w", initializer=fluid.initializer.TruncatedNormal(scale=0.02)), bias_attr=fluid.ParamAttr(name="cls_out_b", initializer=fluid.initializer.Constant(0.))) if is_prediction: probs = fluid.layers.softmax(logits) feed_targets_name = [ src_ids.name, pos_ids.name, sent_ids.name, input_mask.name ] return data_loader, probs, feed_targets_name #fluid.layers.Print(hidden) logits = fluid.layers.softmax(hidden) if task_name == "fewshot": #fluid.layers.Print(logits) #fluid.layers.Print(labels) logits = fluid.layers.reshape(hidden, [-1, num_labels], inplace=True) logits = fluid.layers.reshape(logits, [-1, q * k, k, n, 2], inplace=True) logits = fluid.layers.reduce_mean(logits, dim=3, keep_dim=False) logits = logits[:, :, :, 1] logits = fluid.layers.reshape(logits, [-1, q * k, k], inplace=True) logits = fluid.layers.reshape(logits, [-1, k], inplace=True) ce_loss, probs = fluid.layers.softmax_with_cross_entropy( logits=logits, label=labels, return_softmax=True) loss = fluid.layers.mean(x=ce_loss) num_seqs = fluid.layers.create_tensor(dtype='int64') accuracy = fluid.layers.accuracy(input=probs, label=labels, k=1, total=num_seqs) return data_loader, loss, probs, accuracy, num_seqs elif task_name == "fintune": #fluid.layers.Print(labels) logits = fluid.layers.reshape(hidden, [-1, num_labels], inplace=True) #fluid.layers.Print(logits) ce_loss, probs = fluid.layers.softmax_with_cross_entropy( logits=logits, label=labels, return_softmax=True) #fluid.layers.Print(ce_loss) loss = fluid.layers.mean(x=ce_loss) num_seqs = fluid.layers.create_tensor(dtype='int64') accuracy = fluid.layers.accuracy(input=probs, label=labels, k=1, total=num_seqs) return data_loader, loss, probs, accuracy, num_seqs else: return