def create_model(args, pyreader_name, ernie_config, batch_size=16, is_prediction=False, task_name=""): pyreader = fluid.layers.py_reader( capacity=50, shapes=[[batch_size, args.q_max_seq_len, 1], [batch_size, args.q_max_seq_len, 1], [batch_size, args.q_max_seq_len, 1], [batch_size, args.q_max_seq_len, 1], [batch_size, args.q_max_seq_len, 1], [batch_size, args.p_max_seq_len, 1], [batch_size, args.p_max_seq_len, 1], [batch_size, args.p_max_seq_len, 1], [batch_size, args.p_max_seq_len, 1], [batch_size, args.p_max_seq_len, 1], [batch_size, 1], [batch_size, 1]], dtypes=['int64', 'int64', 'int64', 'int64', 'float32', 'int64', 'int64', 'int64', 'int64', 'float32', 'int64', 'int64'], lod_levels=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], name=pyreader_name, use_double_buffer=True) (src_ids_q, sent_ids_q, pos_ids_q, task_ids_q, input_mask_q, src_ids_p, sent_ids_p, pos_ids_p, task_ids_p, input_mask_p, labels, qids) = fluid.layers.read_file(pyreader) ernie_q = ErnieModel( src_ids=src_ids_q, position_ids=pos_ids_q, sentence_ids=sent_ids_q, task_ids=task_ids_q, input_mask=input_mask_q, config=ernie_config, model_name='query_') ## pos para ernie_p = ErnieModel( src_ids=src_ids_p, position_ids=pos_ids_p, sentence_ids=sent_ids_p, task_ids=task_ids_p, input_mask=input_mask_p, config=ernie_config, model_name='titlepara_') q_cls_feats = ernie_q.get_cls_output() p_cls_feats = ernie_p.get_cls_output() #p_cls_feats = fluid.layers.concat([pos_cls_feats, neg_cls_feats], axis=0) #src_ids_p = fluid.layers.Print(src_ids_p, message='p: ') #p_cls_feats = fluid.layers.Print(p_cls_feats, message='p: ') #multiply logits = fluid.layers.matmul(q_cls_feats, p_cls_feats, transpose_x=False, transpose_y=True) probs = logits #fluid.layers.Print(probs, message='probs: ') #logits2 = fluid.layers.elementwise_mul(x=q_rep, y=p_rep) #fluid.layers.Print(logits2, message='logits2: ') #probs2 = fluid.layers.reduce_sum(logits, dim=-1) #fluid.layers.Print(probs2, message='probs2: ') matrix_labels = fluid.layers.eye(batch_size, batch_size, dtype='float32') matrix_labels.stop_gradient=True #print('DEBUG:\tstart loss') ce_loss, _ = fluid.layers.softmax_with_cross_entropy( logits=logits, label=matrix_labels, soft_label=True, return_softmax=True) loss = fluid.layers.mean(x=ce_loss) #print('DEBUG:\tloss done') matrix_labels = fluid.layers.argmax(matrix_labels, axis=-1) matrix_labels = fluid.layers.reshape(x=matrix_labels, shape=[batch_size, 1]) num_seqs = fluid.layers.create_tensor(dtype='int64') accuracy = fluid.layers.accuracy(input=probs, label=matrix_labels, total=num_seqs) #ce_loss, probs = fluid.layers.softmax_with_cross_entropy( # logits=logits, label=labels, return_softmax=True) #loss = fluid.layers.mean(x=ce_loss) #accuracy = fluid.layers.accuracy( # input=probs, label=labels, total=num_seqs) graph_vars = { "loss": loss, "probs": probs, "accuracy": accuracy, "labels": labels, "num_seqs": num_seqs, "qids": qids, "q_rep": q_cls_feats, "p_rep": p_cls_feats } return pyreader, graph_vars
def create_model(args, pyreader_name, ernie_config, batch_size=16, is_prediction=False, task_name="", fleet_handle=None): print ("DEBUG:\tclassify") pyreader = fluid.layers.py_reader( capacity=50, shapes=[[batch_size, args.q_max_seq_len, 1], [batch_size, args.q_max_seq_len, 1], [batch_size, args.q_max_seq_len, 1], [batch_size, args.q_max_seq_len, 1], [batch_size, args.q_max_seq_len, 1], [batch_size, args.p_max_seq_len, 1], [batch_size, args.p_max_seq_len, 1], [batch_size, args.p_max_seq_len, 1], [batch_size, args.p_max_seq_len, 1], [batch_size, args.p_max_seq_len, 1], [batch_size, args.p_max_seq_len, 1], [batch_size, args.p_max_seq_len, 1], [batch_size, args.p_max_seq_len, 1], [batch_size, args.p_max_seq_len, 1], [batch_size, args.p_max_seq_len, 1], [batch_size, 1], [batch_size, 1]], dtypes=['int64', 'int64', 'int64', 'int64', 'float32', 'int64', 'int64', 'int64', 'int64', 'float32', 'int64', 'int64', 'int64', 'int64', 'float32', 'int64', 'int64'], lod_levels=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], name=task_name + "_" + pyreader_name, use_double_buffer=True) (src_ids_q, sent_ids_q, pos_ids_q, task_ids_q, input_mask_q, src_ids_p_pos, sent_ids_p_pos, pos_ids_p_pos, task_ids_p_pos, input_mask_p_pos, src_ids_p_neg, sent_ids_p_neg, pos_ids_p_neg, task_ids_p_neg, input_mask_p_neg, labels, qids) = fluid.layers.read_file(pyreader) ernie_q = ErnieModel( src_ids=src_ids_q, position_ids=pos_ids_q, sentence_ids=sent_ids_q, task_ids=task_ids_q, input_mask=input_mask_q, config=ernie_config, model_name='query_') ## pos para ernie_pos = ErnieModel( src_ids=src_ids_p_pos, position_ids=pos_ids_p_pos, sentence_ids=sent_ids_p_pos, task_ids=task_ids_p_pos, input_mask=input_mask_p_pos, config=ernie_config, model_name='titlepara_') ## neg para ernie_neg = ErnieModel( src_ids=src_ids_p_neg, position_ids=pos_ids_p_neg, sentence_ids=sent_ids_p_neg, task_ids=task_ids_p_neg, input_mask=input_mask_p_neg, config=ernie_config, model_name='titlepara_') q_cls_feats = ernie_q.get_cls_output() pos_cls_feats = ernie_pos.get_cls_output() neg_cls_feats = ernie_neg.get_cls_output() #src_ids_p_pos = fluid.layers.Print(src_ids_p_pos, message='pos: ') #pos_cls_feats = fluid.layers.Print(pos_cls_feats, message='pos: ') p_cls_feats = fluid.layers.concat([pos_cls_feats, neg_cls_feats], axis=0) if is_prediction: p_cls_feats = fluid.layers.slice(p_cls_feats, axes=[0], starts=[0], ends=[batch_size]) multi = fluid.layers.elementwise_mul(q_cls_feats, p_cls_feats) probs = fluid.layers.reduce_sum(multi, dim=-1) graph_vars = { "probs": probs, "qids": qids, "q_rep": q_cls_feats, "p_rep": p_cls_feats } return pyreader, graph_vars if args.use_cross_batch and fleet_handle is not None: print("worker num is: {}".format(fleet_handle.worker_num())) all_p_cls_feats = fluid.layers.collective._c_allgather( p_cls_feats, fleet_handle.worker_num(), use_calc_stream=True) #multiply logits = fluid.layers.matmul(q_cls_feats, all_p_cls_feats, transpose_x=False, transpose_y=True) worker_id = fleet_handle.worker_index() else: logits = fluid.layers.matmul(q_cls_feats, p_cls_feats, transpose_x=False, transpose_y=True) worker_id = 0 probs = logits all_labels = np.array(range(batch_size * worker_id * 2, batch_size * (worker_id * 2 + 1)), dtype='int64') matrix_labels = fluid.layers.assign(all_labels) matrix_labels = fluid.layers.unsqueeze(matrix_labels, axes=1) matrix_labels.stop_gradient=True # fluid.layers.Print(matrix_labels, message='matrix_labels') #print('DEBUG:\tstart loss') ce_loss = fluid.layers.softmax_with_cross_entropy( logits=logits, label=matrix_labels) loss = fluid.layers.mean(x=ce_loss) #print('DEBUG:\tloss done') num_seqs = fluid.layers.create_tensor(dtype='int64') accuracy = fluid.layers.accuracy( input=probs, label=matrix_labels) graph_vars = { "loss": loss, "probs": probs, "accuracy": accuracy, "labels": labels, "num_seqs": num_seqs, "qids": qids, "q_rep": q_cls_feats, "p_rep": p_cls_feats } cp = [] cp.extend(ernie_q.checkpoints) cp.extend(ernie_pos.checkpoints) cp.extend(ernie_neg.checkpoints) return pyreader, graph_vars, cp