Пример #1
0
    def model_fn(features, labels, mode, params):
        logger.info("*** Features ***")
        if isinstance(features, dict):
            features = features['words'], features['words_seq'], features[
                'text_length']
        input_ids, input_word_ids, text_length_list = features
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        tag_model = BILSTMONLY(params)

        # def metric_fn(label_ids, pred_ids):
        #     return {
        #         'precision': precision(label_ids, pred_ids, params["num_labels"]),
        #         'recall': recall(label_ids, pred_ids, params["num_labels"]),
        #         'f1': f1(label_ids, pred_ids, params["num_labels"])
        #     }
        #
        # eval_metrics = metric_fn(labels, pred_ids)

        output_spec = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            loss, pred_ids, weight = tag_model(input_ids, input_word_ids,
                                               labels, text_length_list,
                                               is_training)
            train_op = optimization.create_optimizer(loss, args.lr,
                                                     params["decay_steps"],
                                                     args.clip_norm)
            hook_dict = {}
            # precision_score, precision_update_op = precision(labels=labels, predictions=pred_ids,
            #                                                  num_classes=params["num_labels"], weights=weight)
            #
            # recall_score, recall_update_op = recall(labels=labels,
            #                                         predictions=pred_ids, num_classes=params["num_labels"],
            #                                         weights=weight)
            hook_dict['loss'] = loss
            hook_dict['global_steps'] = tf.train.get_or_create_global_step()
            logging_hook = tf.train.LoggingTensorHook(
                hook_dict, every_n_iter=args.print_log_steps)

            output_spec = tf.estimator.EstimatorSpec(
                mode=mode,
                loss=loss,
                train_op=train_op,
                training_hooks=[logging_hook])

        elif mode == tf.estimator.ModeKeys.EVAL:
            loss, pred_ids, weight = tag_model(input_ids, input_word_ids,
                                               labels, text_length_list,
                                               is_training)
            # pred_ids = tf.argmax(logits, axis=-1, output_type=tf.int32)
            # weight = tf.sequence_mask(text_length_list)
            # precision_score, precision_update_op = precision(labels=labels,predictions=pred_ids,num_classes=params["num_labels"],weights=weight)
            #
            # recall_score, recall_update_op =recall(labels=labels,
            #                                              predictions=pred_ids,num_classes=params["num_labels"],weights=weight)
            f1_score_val, f1_update_op_val = f1(
                labels=labels,
                predictions=pred_ids,
                num_classes=params["num_labels"],
                weights=weight,
                average="micro")
            # acc_score_val,acc_score_op_val = tf.metrics.accuracy(labels=labels,predictions=pred_ids,weights=weight)
            eval_metric_ops = {"f1": (f1_score_val, f1_update_op_val)}
            eval_hook_dict = {"f1": f1_score_val, "loss": loss}

            eval_logging_hook = tf.train.LoggingTensorHook(
                eval_hook_dict, at_end=True, every_n_iter=args.print_log_steps)
            output_spec = tf.estimator.EstimatorSpec(
                eval_metric_ops=eval_metric_ops,
                mode=mode,
                loss=loss,
                evaluation_hooks=[eval_logging_hook])
        else:
            pred_ids = tag_model(input_ids, input_word_ids, labels,
                                 text_length_list, is_training, True)
            output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                     predictions=pred_ids)
        return output_spec
Пример #2
0
    def model_fn(features, labels, mode, params):
        logger.info("*** Features ***")
        if isinstance(features, dict):
            features = features['words'], features['text_length'], features[
                'query_length'], features['token_type_ids']
        print(features)
        input_ids, text_length_list, query_length_list, token_type_id_list = features
        if labels is not None:
            start_labels, end_labels = labels
        else:
            start_labels, end_labels = None, None
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        is_testing = (mode == tf.estimator.ModeKeys.PREDICT)
        bert_config = modeling.BertConfig.from_json_file(bert_config_file)
        tag_model = bertMRC(params, bert_config)
        # input_ids,labels,token_type_ids_list,query_len_list,text_length_list,is_training,is_testing=False
        if is_testing:
            pred_start_ids, pred_end_ids, weight = tag_model(
                input_ids, start_labels, end_labels, token_type_id_list,
                query_length_list, text_length_list, is_training, is_testing)
        else:
            loss, pred_start_ids, pred_end_ids, weight = tag_model(
                input_ids, start_labels, end_labels, token_type_id_list,
                query_length_list, text_length_list, is_training)

        # def metric_fn(label_ids, pred_ids):
        #     return {
        #         'precision': precision(label_ids, pred_ids, params["num_labels"]),
        #         'recall': recall(label_ids, pred_ids, params["num_labels"]),
        #         'f1': f1(label_ids, pred_ids, params["num_labels"])
        #     }
        #
        # eval_metrics = metric_fn(labels, pred_ids)
        tvars = tf.trainable_variables()
        # 加载BERT模型
        if init_checkpoints:
            (assignment_map, initialized_variable_names) = \
                modeling.get_assignment_map_from_checkpoint(tvars,
                                                            init_checkpoints)
            tf.train.init_from_checkpoint(init_checkpoints, assignment_map)
        output_spec = None
        # f1_score_val, f1_update_op_val = f1(labels=labels, predictions=pred_ids, num_classes=params["num_labels"],
        #                                     weights=weight)

        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(loss,
                                                     args.lr,
                                                     params["decay_steps"],
                                                     args.clip_norm,
                                                     use_tpu=False)
            hook_dict = {}
            # precision_score, precision_update_op = precision(labels=labels, predictions=pred_ids,
            #                                                  num_classes=params["num_labels"], weights=weight)
            #
            # recall_score, recall_update_op = recall(labels=labels,
            #                                         predictions=pred_ids, num_classes=params["num_labels"],
            #                                         weights=weight)
            hook_dict['loss'] = loss
            hook_dict['global_steps'] = tf.train.get_or_create_global_step()
            logging_hook = tf.train.LoggingTensorHook(
                hook_dict, every_n_iter=args.print_log_steps)

            output_spec = tf.estimator.EstimatorSpec(
                mode=mode,
                loss=loss,
                train_op=train_op,
                training_hooks=[logging_hook])

        elif mode == tf.estimator.ModeKeys.EVAL:
            # pred_ids = tf.argmax(logits, axis=-1, output_type=tf.int32)
            # weight = tf.sequence_mask(text_length_list)
            # precision_score, precision_update_op = precision(labels=labels,predictions=pred_ids,num_classes=params["num_labels"],weights=weight)
            #
            # recall_score, recall_update_op =recall(labels=labels,
            #                                              predictions=pred_ids,num_classes=params["num_labels"],weights=weight)
            f1_start_val, f1_update_op_val = f1(labels=start_labels,
                                                predictions=pred_start_ids,
                                                num_classes=2,
                                                weights=weight,
                                                average="macro")
            f1_end_val, f1_end_update_op_val = f1(labels=end_labels,
                                                  predictions=pred_end_ids,
                                                  num_classes=2,
                                                  weights=weight,
                                                  average="macro")

            # f1_score_val_micro,f1_update_op_val_micro = f1(labels=labels,predictions=pred_ids,num_classes=params["num_labels"],weights=weight,average="micro")

            # acc_score_val,acc_score_op_val = tf.metrics.accuracy(labels=labels,predictions=pred_ids,weights=weight)
            # eval_loss = tf.metrics.mean_squared_error(labels=labels, predictions=pred_ids,weights=weight)
            eval_metric_ops = {
                "f1_start_micro": (f1_start_val, f1_update_op_val),
                "f1_end_micro": (f1_end_val, f1_end_update_op_val)
            }

            # eval_hook_dict = {"f1":f1_score_val,"loss":loss}

            # eval_logging_hook = tf.train.LoggingTensorHook(
            #     at_end=True,every_n_iter=args.print_log_steps)
            output_spec = tf.estimator.EstimatorSpec(
                eval_metric_ops=eval_metric_ops, mode=mode, loss=loss)
        else:
            output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                     predictions={
                                                         "start_ids":
                                                         pred_start_ids,
                                                         "end_ids":
                                                         pred_end_ids
                                                     })
        return output_spec
Пример #3
0
    def model_fn(features, labels, mode, params):
        logger.info("*** Features ***")
        if isinstance(features, dict):
            features = features['words'], features['token_type_ids'], features[
                'text_length']
        #         print(features)
        # input_ids,token_type_ids,text_length_list = features
        input_ids, token_type_ids, text_length_list = features
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        is_testing = (mode == tf.estimator.ModeKeys.PREDICT)
        bert_config = modeling.BertConfig.from_json_file(bert_config_file)
        # tag_model = bertEventType(params,bert_config)
        tag_model = bertEventType(params, bert_config)
        if is_testing:
            # pred_ids = tag_model(input_ids, labels, text_length_list, token_type_ids,is_training,is_testing)
            pred_ids = tag_model(input_ids, labels, text_length_list,
                                 token_type_ids, is_training, is_testing)
        else:
            per_example_loss, loss, pred_ids = tag_model(
                input_ids, labels, text_length_list, token_type_ids,
                is_training)

        tvars = tf.trainable_variables()
        # 加载BERT模型
        if init_checkpoints:
            (assignment_map, initialized_variable_names) = \
                modeling.get_assignment_map_from_checkpoint(tvars,
                                                            init_checkpoints)
            tf.train.init_from_checkpoint(init_checkpoints, assignment_map)
        output_spec = None

        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(loss, args.lr,
                                                     params["decay_steps"],
                                                     None, False)
            hook_dict = {}

            hook_dict['loss'] = loss
            hook_dict['global_steps'] = tf.train.get_or_create_global_step()
            logging_hook = tf.train.LoggingTensorHook(
                hook_dict, every_n_iter=args.print_log_steps)

            output_spec = tf.estimator.EstimatorSpec(
                mode=mode,
                loss=loss,
                train_op=train_op,
                training_hooks=[logging_hook])

        elif mode == tf.estimator.ModeKeys.EVAL:
            pred_ids = tf.where(pred_ids > 0.5, tf.ones_like(pred_ids),
                                tf.zeros_like(pred_ids))
            print(pred_ids)
            print(labels)
            f1_score_val_micro, f1_update_op_val_micro = f1(
                labels=labels, predictions=pred_ids, num_classes=2)
            eval_metrics = {
                "f1_score_micro": (f1_score_val_micro, f1_update_op_val_micro)
            }
            eval_metrics['eval_loss'] = tf.metrics.mean(
                values=per_example_loss)
            output_spec = tf.estimator.EstimatorSpec(
                eval_metric_ops=eval_metrics, mode=mode, loss=loss)
        else:
            output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                     predictions=pred_ids)
        return output_spec
Пример #4
0
    def model_fn(features, labels, mode, params):
        logger.info("*** Features ***")
        if isinstance(features, dict):
            features = features['words'], features['text_length'], features[
                'query_length'], features['token_type_ids']
        print(features)
        input_ids, text_length_list, query_length_list, token_type_id_list = features
        if labels is not None:
            start_labels, end_labels, has_answer_label = labels
        else:
            start_labels, end_labels, has_answer_label = None, None, None

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        is_testing = (mode == tf.estimator.ModeKeys.PREDICT)
        bert_config = modeling.BertConfig.from_json_file(bert_config_file)
        tag_model = VerfiyMRC(params, bert_config)

        # input_ids,labels,token_type_ids_list,query_len_list,text_length_list,is_training,is_testing=False
        if is_testing:
            pred_start_ids, pred_end_ids, weight, predict_start_prob, predict_end_prob, has_answer_prob = tag_model(
                input_ids, start_labels, end_labels, token_type_id_list,
                query_length_list, text_length_list, has_answer_label,
                is_training, is_testing)
            # predict_ids,weight,predict_prob = tag_model(input_ids,labels,token_type_id_list,query_length_list,text_length_list,is_training,is_testing)
        else:
            loss, pred_start_ids, pred_end_ids, weight, predict_start_prob, predict_end_prob, has_answer_prob = tag_model(
                input_ids, start_labels, end_labels, token_type_id_list,
                query_length_list, text_length_list, has_answer_label,
                is_training)
            # loss,predict_ids,weight,predict_prob = tag_model(input_ids,labels,token_type_id_list,query_length_list,text_length_list,is_training,is_testing)

        # def metric_fn(label_ids, pred_ids):
        #     return {
        #         'precision': precision(label_ids, pred_ids, params["num_labels"]),
        #         'recall': recall(label_ids, pred_ids, params["num_labels"]),
        #         'f1': f1(label_ids, pred_ids, params["num_labels"])
        #     }
        #
        # eval_metrics = metric_fn(labels, pred_ids)
        tvars = tf.trainable_variables()
        # 加载BERT模型
        if init_checkpoints:
            (assignment_map, initialized_variable_names) = \
                modeling.get_assignment_map_from_checkpoint(tvars,
                                                            init_checkpoints)
            tf.train.init_from_checkpoint(init_checkpoints, assignment_map)
        output_spec = None
        # f1_score_val, f1_update_op_val = f1(labels=labels, predictions=pred_ids, num_classes=params["num_labels"],
        #                                     weights=weight)

        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(
                loss, args.lr, params["train_steps"],
                params["num_warmup_steps"], args.clip_norm)
            hook_dict = {}
            # precision_score, precision_update_op = precision(labels=labels, predictions=pred_ids,
            #                                                  num_classes=params["num_labels"], weights=weight)
            #
            # recall_score, recall_update_op = recall(labels=labels,
            #                                         predictions=pred_ids, num_classes=params["num_labels"],
            #                                         weights=weight)
            hook_dict['loss'] = loss
            hook_dict['global_steps'] = tf.train.get_or_create_global_step()
            logging_hook = tf.train.LoggingTensorHook(
                hook_dict, every_n_iter=args.print_log_steps)

            output_spec = tf.estimator.EstimatorSpec(
                mode=mode,
                loss=loss,
                train_op=train_op,
                training_hooks=[logging_hook])

        elif mode == tf.estimator.ModeKeys.EVAL:
            has_answer_pred = tf.where(has_answer_prob > 0.5,
                                       tf.ones_like(has_answer_prob),
                                       tf.zeros_like(has_answer_prob))

            # pred_ids = tf.argmax(logits, axis=-1, output_type=tf.int32)
            # weight = tf.sequence_mask(text_length_list)
            # precision_score, precision_update_op = precision(labels=labels,predictions=pred_ids,num_classes=params["num_labels"],weights=weight)
            #
            # recall_score, recall_update_op =recall(labels=labels,
            #                                              predictions=pred_ids,num_classes=params["num_labels"],weights=weight)
            # def metric_fn(per_example_loss, label_ids, probabilities):

            #     logits_split = tf.split(probabilities, params["num_labels"], axis=-1)
            #     label_ids_split = tf.split(label_ids, params["num_labels"], axis=-1)
            #     # metrics change to auc of every class
            #     eval_dict = {}
            #     for j, logits in enumerate(logits_split):
            #         label_id_ = tf.cast(label_ids_split[j], dtype=tf.int32)
            #         current_auc, update_op_auc = tf.metrics.auc(label_id_, logits)
            #         eval_dict[str(j)] = (current_auc, update_op_auc)
            #     eval_dict['eval_loss'] = tf.metrics.mean(values=per_example_loss)
            #     return eval_dict
            # eval_metrics = metric_fn(per_example_loss, labels, pred_ids)
            f1_start_val, f1_update_op_val = f1(labels=start_labels,
                                                predictions=pred_start_ids,
                                                num_classes=2,
                                                weights=weight,
                                                average="macro")
            f1_end_val, f1_end_update_op_val = f1(labels=end_labels,
                                                  predictions=pred_end_ids,
                                                  num_classes=2,
                                                  weights=weight,
                                                  average="macro")
            # f1_val,f1_update_op_val = f1(labels=labels,predictions=predict_ids,num_classes=3,weights=weight,average="macro")
            has_answer_label = tf.cast(has_answer_label, tf.float32)
            f1_has_val, f1_has_update_op_val = f1(labels=has_answer_label,
                                                  predictions=has_answer_pred,
                                                  num_classes=2)

            # f1_score_val_micro,f1_update_op_val_micro = f1(labels=labels,predictions=pred_ids,num_classes=params["num_labels"],weights=weight,average="micro")

            # acc_score_val,acc_score_op_val = tf.metrics.accuracy(labels=labels,predictions=pred_ids,weights=weight)
            # eval_loss = tf.metrics.mean_squared_error(labels=labels, predictions=pred_ids,weights=weight)

            eval_metric_ops = {
                "f1_start_macro": (f1_start_val, f1_update_op_val),
                "f1_end_macro": (f1_end_val, f1_end_update_op_val),
                "f1_has_answer_macro": (f1_has_val, f1_has_update_op_val),
                "eval_loss": tf.metrics.mean(values=loss)
            }

            # eval_metric_ops = {
            # "f1_macro":(f1_val,f1_update_op_val),
            # "eval_loss":tf.metrics.mean(values=loss)}

            # eval_hook_dict = {"f1":f1_score_val,"loss":loss}

            # eval_logging_hook = tf.train.LoggingTensorHook(
            #     at_end=True,every_n_iter=args.print_log_steps)
            output_spec = tf.estimator.EstimatorSpec(
                eval_metric_ops=eval_metric_ops, mode=mode, loss=loss)
        else:
            output_spec = tf.estimator.EstimatorSpec(mode=mode,
                                                     predictions={
                                                         "start_ids":
                                                         pred_start_ids,
                                                         "end_ids":
                                                         pred_end_ids,
                                                         "start_probs":
                                                         predict_start_prob,
                                                         "end_probs":
                                                         predict_end_prob,
                                                         "has_answer_probs":
                                                         has_answer_prob
                                                     })
            # output_spec = tf.estimator.EstimatorSpec(
            #     mode=mode,
            #     predictions={"pred_ids":predict_ids,"pred_probs":predict_prob}
            # )
        return output_spec