def _calculate_metrics(self, run_states): total_infer = total_label = total_correct = loss_sum = 0 run_step = run_time_used = run_examples = 0 precision_sum = recall_sum = f1_score_sum = 0 for run_state in run_states: loss_sum += np.mean(run_state.run_results[-1]) if self.add_crf: precision_sum += np.mean( run_state.run_results[0]) * run_state.run_examples recall_sum += np.mean( run_state.run_results[1]) * run_state.run_examples f1_score_sum += np.mean( run_state.run_results[2]) * run_state.run_examples else: np_labels = run_state.run_results[0] np_infers = run_state.run_results[1] np_lens = run_state.run_results[2] label_num, infer_num, correct_num = chunk_eval( np_labels, np_infers, np_lens, self.num_labels, self.device_count) total_infer += infer_num total_label += label_num total_correct += correct_num run_examples += run_state.run_examples run_step += run_state.run_step run_time_used = time.time() - run_states[0].run_time_begin run_speed = run_step / run_time_used avg_loss = loss_sum / run_examples if self.add_crf: precision = precision_sum / run_examples recall = recall_sum / run_examples f1 = f1_score_sum / run_examples else: precision, recall, f1 = calculate_f1(total_label, total_infer, total_correct) # The first key will be used as main metrics to update the best model scores = OrderedDict() for metric in self.metrics_choices: if metric == "precision": scores["precision"] = precision elif metric == "recall": scores["recall"] = recall elif metric == "f1": scores["f1"] = f1 else: raise ValueError("Not Support Metric: \"%s\"" % metric) return scores, avg_loss, run_speed
def _calculate_metrics(self, run_states): total_infer = total_label = total_correct = loss_sum = 0 run_step = run_time_used = run_examples = 0 for run_state in run_states: loss_sum += np.mean(run_state.run_results[-1]) np_labels = run_state.run_results[0] np_infers = run_state.run_results[1] np_lens = run_state.run_results[2] label_num, infer_num, correct_num = chunk_eval( np_labels, np_infers, np_lens, self.num_labels, self.device_count) total_infer += infer_num total_label += label_num total_correct += correct_num run_examples += run_state.run_examples run_step += run_state.run_step run_time_used = time.time() - run_states[0].run_time_begin run_speed = run_step / run_time_used avg_loss = loss_sum / run_examples precision, recall, f1 = calculate_f1(total_label, total_infer, total_correct) return precision, recall, f1, avg_loss, run_speed
def finetune(args): module = hub.Module(name="ernie", max_seq_len=args.max_seq_len) # Use the appropriate tokenizer to preprocess the data set tokenizer = hub.BertTokenizer(vocab_file=module.get_vocab_path()) dataset = hub.dataset.MSRA_NER(tokenizer=tokenizer, max_seq_len=args.max_seq_len) with fluid.dygraph.guard(): ts = TransformerSeqLabeling(num_classes=dataset.num_labels, transformer=module) adam = AdamOptimizer(learning_rate=1e-5, parameter_list=ts.parameters()) state_dict_path = os.path.join(args.checkpoint_dir, 'dygraph_state_dict') if os.path.exists(state_dict_path + '.pdparams'): state_dict, _ = fluid.load_dygraph(state_dict_path) ts.load_dict(state_dict) loss_sum = total_infer = total_label = total_correct = cnt = 0 for epoch in range(args.num_epoch): for batch_id, data in enumerate( dataset.batch_records_generator( phase="train", batch_size=args.batch_size, shuffle=True, pad_to_batch_max_seq_len=False)): batch_size = len(data["input_ids"]) input_ids = np.array(data["input_ids"]).astype( np.int64).reshape([batch_size, -1, 1]) position_ids = np.array(data["position_ids"]).astype( np.int64).reshape([batch_size, -1, 1]) segment_ids = np.array(data["segment_ids"]).astype( np.int64).reshape([batch_size, -1, 1]) input_mask = np.array(data["input_mask"]).astype( np.float32).reshape([batch_size, -1, 1]) labels = np.array(data["label"]).astype(np.int64).reshape( -1, 1) seq_len = np.array(data["seq_len"]).astype(np.int64).reshape( -1, 1) pred, ret_infers = ts(input_ids, position_ids, segment_ids, input_mask) loss = fluid.layers.cross_entropy(pred, to_variable(labels)) avg_loss = fluid.layers.mean(loss) avg_loss.backward() adam.minimize(avg_loss) loss_sum += avg_loss.numpy() * labels.shape[0] label_num, infer_num, correct_num = chunk_eval( labels, ret_infers.numpy(), seq_len, dataset.num_labels, 1) cnt += labels.shape[0] total_infer += infer_num total_label += label_num total_correct += correct_num if batch_id % args.log_interval == 0: precision, recall, f1 = calculate_f1( total_label, total_infer, total_correct) print('epoch {}: loss {}, f1 {} recall {} precision {}'. format(epoch, loss_sum / cnt, f1, recall, precision)) loss_sum = total_infer = total_label = total_correct = cnt = 0 if batch_id % args.save_interval == 0: state_dict = ts.state_dict() fluid.save_dygraph(state_dict, state_dict_path)
def finetune(args): ernie = hub.Module(name="ernie", max_seq_len=args.max_seq_len) with fluid.dygraph.guard(): dataset = hub.dataset.MSRA_NER() ts = TransformerSequenceLabelLayer( num_classes=dataset.num_labels, transformer=ernie) adam = AdamOptimizer(learning_rate=1e-5, parameter_list=ts.parameters()) state_dict_path = os.path.join(args.checkpoint_dir, 'dygraph_state_dict') if os.path.exists(state_dict_path + '.pdparams'): state_dict, _ = fluid.load_dygraph(state_dict_path) ts.load_dict(state_dict) reader = hub.reader.SequenceLabelReader( dataset=dataset, vocab_path=ernie.get_vocab_path(), max_seq_len=args.max_seq_len, sp_model_path=ernie.get_spm_path(), word_dict_path=ernie.get_word_dict_path()) train_reader = reader.data_generator( batch_size=args.batch_size, phase='train') loss_sum = total_infer = total_label = total_correct = cnt = 0 # 执行epoch_num次训练 for epoch in range(args.num_epoch): # 读取训练数据进行训练 for batch_id, data in enumerate(train_reader()): input_ids = np.array(data[0][0]).astype(np.int64) position_ids = np.array(data[0][1]).astype(np.int64) segment_ids = np.array(data[0][2]).astype(np.int64) input_mask = np.array(data[0][3]).astype(np.float32) labels = np.array(data[0][4]).astype(np.int64).reshape(-1, 1) seq_len = np.squeeze( np.array(data[0][5]).astype(np.int64), axis=1) pred, ret_infers = ts(input_ids, position_ids, segment_ids, input_mask) loss = fluid.layers.cross_entropy(pred, to_variable(labels)) avg_loss = fluid.layers.mean(loss) avg_loss.backward() # 参数更新 adam.minimize(avg_loss) loss_sum += avg_loss.numpy() * labels.shape[0] label_num, infer_num, correct_num = chunk_eval( labels, ret_infers.numpy(), seq_len, dataset.num_labels, 1) cnt += labels.shape[0] total_infer += infer_num total_label += label_num total_correct += correct_num if batch_id % args.log_interval == 0: precision, recall, f1 = calculate_f1( total_label, total_infer, total_correct) print('epoch {}: loss {}, f1 {} recall {} precision {}'. format(epoch, loss_sum / cnt, f1, recall, precision)) loss_sum = total_infer = total_label = total_correct = cnt = 0 if batch_id % args.save_interval == 0: state_dict = ts.state_dict() fluid.save_dygraph(state_dict, state_dict_path)