def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels):
    """

    :param bert_config:
    :param is_training:
    :param input_ids:
    :param input_mask:
    :param segment_ids:
    :param labels:
    :param num_labels:
    :param use_one_hot_embedding:
    :return:
    """
    # 通过传入的训练数据,进行representation
    model = modeling.BertModel(
        config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
    )

    embedding_layer = model.get_sequence_output()
    output_layer = model.get_pooled_output()
    hidden_size = output_layer.shape[-1].value

    # model = CNN_Classification(embedding_chars=embedding_layer,
    #                                labels=labels,
    #                                num_tags=num_labels,
    #                                sequence_length=FLAGS.max_seq_length,
    #                                embedding_dims=embedding_layer.shape[-1].value,
    #                                vocab_size=0,
    #                                filter_sizes=[3, 4, 5],
    #                                num_filters=3,
    #                                dropout_keep_prob=FLAGS.dropout_keep_prob,
    #                                l2_reg_lambda=0.001)
    # loss, predictions, probabilities = model.add_cnn_layer()

    output_weights = tf.get_variable(
        "output_weights", [num_labels, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable(
        "output_bias", [num_labels], initializer=tf.zeros_initializer())

    with tf.variable_scope("loss"):
        if is_training:
            # I.e., 0.1 dropout
            output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

        logits = tf.matmul(output_layer, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        probabilities = tf.nn.softmax(logits, axis=-1)
        log_probs = tf.nn.log_softmax(logits, axis=-1)

        one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

        per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
        loss = tf.reduce_mean(per_example_loss)
    return (loss, per_example_loss, logits, probabilities)
Пример #2
0
def create_model(bert_config,
                 is_training,
                 input_ids,
                 input_mask,
                 segment_ids,
                 labels,
                 num_labels,
                 use_one_hot_embeddings,
                 dropout_rate=1.0,
                 lstm_size=1,
                 cell='lstm',
                 num_layers=1):
    """
    创建X模型
    :param bert_config: bert 配置
    :param is_training:
    :param input_ids: 数据的idx 表示
    :param input_mask:
    :param segment_ids:
    :param labels: 标签的idx 表示
    :param num_labels: 类别数量
    :param use_one_hot_embeddings:
    :return:
    """
    # 使用数据加载BertModel,获取对应的字embedding
    import tensorflow as tf
    from bert_base.bert import modeling
    model = modeling.BertModel(config=bert_config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=use_one_hot_embeddings)
    # 获取对应的embedding 输入数据[batch_size, seq_length, embedding_size]
    # embedding Tensor("bert/encoder/Reshape_13:0", shape=(56, 202, 768), dtype=float32)
    embedding = model.get_sequence_output()
    # max_seq_length 202
    max_seq_length = embedding.shape[1].value
    # 算序列真实长度
    # input_ids Tensor("IteratorGetNext:0", shape=(56, 202), dtype=int32)
    # tf.abs Tensor("Abs_1:0", shape=(56, 202), dtype=int32)
    # used Tensor("Sign:0", shape=(56, 202), dtype=int32)
    # lengths Tensor("Sum:0", shape=(56,), dtype=int32)  1行56列(/56行1列?)的数组   每个batch下序列的真实长度
    used = tf.sign(tf.abs(input_ids))
    lengths = tf.reduce_sum(
        used, reduction_indices=1)  # [batch_size] 大小的向量,包含了当前batch中的序列长度
    # 添加CRF output layer
    blstm_crf = BLSTM_CRF(embedded_chars=embedding,
                          hidden_unit=lstm_size,
                          cell_type=cell,
                          num_layers=num_layers,
                          dropout_rate=dropout_rate,
                          initializers=initializers,
                          num_labels=num_labels,
                          seq_length=max_seq_length,
                          labels=labels,
                          lengths=lengths,
                          is_training=is_training)
    rst = blstm_crf.add_blstm_crf_layer(crf_only=False)
    return rst
Пример #3
0
def bert_model(input_ids, is_training):
    with tf.Session() as sess:
        input_mask = tf.zeros(shape=tf.shape(input_ids), dtype=np.int32)
        token_type_ids = tf.zeros(shape=tf.shape(input_ids), dtype=np.int32)
        model = modeling.BertModel(config=bert_config,
                                   is_training=is_training,
                                   input_ids=input_ids,
                                   input_mask=input_mask,
                                   token_type_ids=token_type_ids,
                                   use_one_hot_embeddings=True)
        # 调用init_from_checkpoint方法
        # 最后初始化变量
        # graph = tf.get_default_graph()
        # tvars = tf.trainable_variables()
        #
        # (assignment_map,
        #  initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(
        #     tvars, init_checkpoint)
        #
        # tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        # # 初始化所有的变量
        #
        # tf.logging.info("**** Trainable Variables ****")
        # for var in tvars:
        #     init_string = ""
        #     if var.name in initialized_variable_names:
        #         init_string = ", *INIT_FROM_CKPT*"
        #     tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
        #                     init_string)
        # sess.run(tf.global_variables_initializer())
        embeddings = model.get_sequence_output()
        return embeddings
Пример #4
0
def create_model(
    bert_config,
    is_training,
    input_ids,
    input_mask,
    segment_ids,
    use_one_hot_embeddings,
):
    """
    创建X模型
    :param bert_config: bert 配置
    :param is_training:
    :param input_ids: 数据的idx 表示
    :param input_mask:
    :param segment_ids:
    :param use_one_hot_embeddings:
    :return:
    """
    # 使用数据加载BertModel,获取对应的字embedding
    model = modeling.BertModel(config=bert_config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=use_one_hot_embeddings)
    # 获取对应的embedding 输入数据[batch_size, seq_length, embedding_size]
    embedding = model.get_sequence_output(
    )  # (batch_size, seq_length, embedding_size)
    print('bert最有一个隐层的输出维度:', embedding.shape)
    return embedding
Пример #5
0
def create_model(bert_config,
                 is_training,
                 input_ids,
                 input_mask,
                 segment_ids,
                 labels,
                 num_labels,
                 use_one_hot_embeddings,
                 dropout_rate=1.0,
                 lstm_size=1,
                 cell='lstm',
                 num_layers=1):
    """
    创建模型
    :param bert_config: bert配置
    :param is_training: 是否训练
    :param input_ids: 数据的idx表示
    :param input_mask:
    :param segment_ids:
    :param labels: 标签的idx表示
    :param num_labels: 类别数量
    :param use_one_hot_embeddings:
    :param dropout_rate:
    :param lstm_size:
    :param cell:
    :param num_layers:
    :return:
    """
    # 使用数据加载BertModel,获取对应的字embedding
    model = modeling.BertModel(config=bert_config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=use_one_hot_embeddings)
    # 获取对应的embedding,输入数据[batch_size, seq_length, embedding_size]
    embedding = model.get_sequence_output()
    max_seq_length = embedding.shape[1].value

    # 序列的真实长度
    used = tf.sign(tf.abs(input_ids))
    lengths = tf.reduce_sum(
        used, reduction_indices=1)  # [batch_size]大小的向量,包含了当前的batch中的序列长度

    # 添加CRF output layer
    blstm_crf = BLSTM_CRF(embedd_chars=embedding,
                          hidden_unit=lstm_size,
                          cell_type=cell,
                          num_layers=num_layers,
                          dropout_rate=dropout_rate,
                          initializers=initializers,
                          num_labels=num_labels,
                          seq_length=max_seq_length,
                          labels=labels,
                          lengths=lengths,
                          is_training=is_training)

    res = blstm_crf.add_blstm_crf_layer(crf_only=True)
    return res
Пример #6
0
def create_model(bert_config,
                 is_training,
                 input_ids,
                 input_mask,
                 segment_ids,
                 labels,
                 num_labels,
                 use_one_hot_embeddings,
                 dropout_rate=1.0,
                 lstm_size=1,
                 cell='lstm',
                 num_layers=1):
    """
    创建X模型
    :param bert_config: bert 配置
    :param is_training:
    :param input_ids: 数据的idx 表示
    :param input_mask:
    :param segment_ids:
    :param labels: 标签的idx 表示
    :param num_labels: 类别数量
    :param use_one_hot_embeddings:
    :return:
    """
    # 使用数据加载BertModel,获取对应的字embedding
    import tensorflow as tf
    from bert_base.bert import modeling
    model = modeling.BertModel(config=bert_config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=use_one_hot_embeddings)
    # 获取对应的embedding 输入数据[batch_size, seq_length, embedding_size]
    label_vocab = {"O": 0, "B": 1, "I": 2, "X": 3, "[CLS]": 4, "[SEP]": 5}
    SPAN_TYPE = "IOB2"
    mask = transition_mask(label_vocab, SPAN_TYPE, label_vocab["[CLS]"],
                           label_vocab["[SEP]"])

    embedding = model.get_sequence_output()
    max_seq_length = embedding.shape[1].value
    # 算序列真实长度
    used = tf.sign(tf.abs(input_ids))
    lengths = tf.reduce_sum(
        used, reduction_indices=1)  # [batch_size] 大小的向量,包含了当前batch中的序列长度
    # 添加CRF output layer
    blstm_crf = BLSTM_CRF(embedded_chars=embedding,
                          hidden_unit=lstm_size,
                          cell_type=cell,
                          num_layers=num_layers,
                          dropout_rate=dropout_rate,
                          initializers=initializers,
                          num_labels=num_labels,
                          seq_length=max_seq_length,
                          labels=labels,
                          lengths=lengths,
                          is_training=is_training)
    rst = blstm_crf.add_blstm_crf_layer(crf_only=True)
    return rst
Пример #7
0
def create_model(bert_config,
                 is_training,
                 input_ids,
                 input_mask,
                 segment_ids,
                 labels,
                 num_labels,
                 use_one_hot_embeddings,
                 dropout_rate=1.0,
                 lstm_size=1,
                 cell='lstm',
                 num_layers=1,
                 crf_only=False,
                 lstm_only=False):
    """
    创建X模型
    :param bert_config: bert 配置
    :param is_training:
    :param input_ids: 数据的idx 表示
    :param input_mask:
    :param segment_ids:
    :param labels: 标签的idx 表示
    :param num_labels: 类别数量
    :param use_one_hot_embeddings:
    :return:
    """
    # 使用数据加载BertModel,获取对应的字embedding
    import tensorflow as tf
    from bert_base.bert import modeling
    model = modeling.BertModel(config=bert_config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=use_one_hot_embeddings)
    # 获取对应的embedding 输入数据[batch_size, seq_length, embedding_size]
    embedding = model.get_sequence_output()
    max_seq_length = embedding.shape[1].value
    # 算序列真实长度
    used = tf.sign(tf.abs(input_ids))
    lengths = tf.reduce_sum(
        used, reduction_indices=1)  # [batch_size] 大小的向量,包含了当前batch中的序列长度
    # 添加CRF output layer
    with tf.variable_scope('finetune'):
        blstm_crf = BLSTM_CRF(embedded_chars=embedding,
                              input_mask=input_mask,
                              hidden_unit=lstm_size,
                              cell_type=cell,
                              num_layers=num_layers,
                              dropout_rate=dropout_rate,
                              initializers=initializers,
                              num_labels=num_labels,
                              seq_length=max_seq_length,
                              labels=labels,
                              lengths=lengths,
                              is_training=is_training)
        rst = blstm_crf.add_blstm_crf_layer(crf_only=crf_only,
                                            lstm_only=lstm_only)
    return rst
Пример #8
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        unique_ids = features["unique_ids"]
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        input_type_ids = features["input_type_ids"]

        model = modeling.BertModel(
            config=bert_config,
            is_training=False,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=input_type_ids,
            use_one_hot_embeddings=use_one_hot_embeddings)

        if mode != tf.estimator.ModeKeys.PREDICT:
            raise ValueError("Only PREDICT modes are supported: %s" % (mode))

        tvars = tf.trainable_variables()
        scaffold_fn = None
        (assignment_map, initialized_variable_names
         ) = modeling.get_assignment_map_from_checkpoint(
             tvars, init_checkpoint)
        if use_tpu:

            def tpu_scaffold():
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
                return tf.train.Scaffold()

            scaffold_fn = tpu_scaffold
        else:
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ""
            if var.name in initialized_variable_names:
                init_string = ", *INIT_FROM_CKPT*"
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        all_layers = model.get_all_encoder_layers()

        predictions = {
            "unique_id": unique_ids,
        }

        for (i, layer_index) in enumerate(layer_indexes):
            predictions["layer_output_%d" % i] = all_layers[layer_index]

        output_spec = tf.contrib.tpu.TPUEstimatorSpec(mode=mode,
                                                      predictions=predictions,
                                                      scaffold_fn=scaffold_fn)
        return output_spec
Пример #9
0
    def bert_layer(self):
        bert_config = modeling.BertConfig.from_json_file(ARGS.bert_config)

        model = modeling.BertModel(config=bert_config,
                                   is_training=self.is_training,
                                   input_ids=self.input_ids,
                                   input_mask=self.input_mask,
                                   token_type_ids=self.segment_ids,
                                   use_one_hot_embeddings=True)
        self.embedded = model.get_sequence_output()
        self.model_inputs = tf.nn.dropout(self.embedded, self.dropout)
Пример #10
0
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 labels, num_labels, use_one_hot_embeddings):
    """
    创建X模型
    :param bert_config: bert 配置
    :param is_training:
    :param input_ids: 数据的idx 表示
    :param input_mask:
    :param segment_ids:
    :param labels: 标签的idx 表示
    :param num_labels: 类别数量
    :param use_one_hot_embeddings
    :return:
    """
    # 使用数据加载BertModel,获取对应的字embedding
    model = modeling.BertModel(config=bert_config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=use_one_hot_embeddings)
    print("获取到bert的输出")
    # 获取对应的embedding 输入数据[batch_size, seq_length, embedding_size]
    embedding = model.get_sequence_output()
    max_seq_length = embedding.shape[1].value

    used = tf.sign(tf.abs(input_ids))
    lengths = tf.reduce_sum(
        used, reduction_indices=1)  # [batch_size] 大小的向量,包含了当前batch中的序列长度
    # 将embedding作为LSTM结构的输入
    # 加载BLSTM-CRF模型对象
    blstm_crf = BLSTM_CRF(embedded_chars=embedding,
                          hidden_unit=FLAGS.lstm_size,
                          cell_type=FLAGS.cell,
                          num_layers=FLAGS.num_layers,
                          dropout_rate=FLAGS.dropout_rate,
                          initializers=initializers,
                          num_labels=num_labels,
                          seq_length=max_seq_length,
                          labels=labels,
                          lengths=lengths,
                          is_training=is_training)
    # 获取添加我们自己网络结构后的结果,这些结果包括loss, logits, trans, pred_ids
    # 因为BERT里面已经存在双向编码,所以LSTM并不是必须的,可以将BERT最后一层的结构直接扔给CRF进行解码
    try:
        rst = blstm_crf.add_blstm_crf_layer()
    except Exception as e:
        print(str(e))
    return rst
Пример #11
0
    def bert_layer(self):
        bert_config = modeling.BertConfig.from_json_file(self.bert_config)

        model = modeling.BertModel(
            config=bert_config,
            is_training=self.is_training,
            input_ids=self.input_ids,
            input_mask=self.input_mask,
            token_type_ids=self.segment_ids,
            use_one_hot_embeddings=False
        )
        # 获取各词embedding结果
        self.embedded_sentence = model.get_sequence_output()
        self.model_inputs = tf.nn.dropout(
            self.embedded_sentence, self.dropout
        )

        # 获取整句embedding结果
        self.embedded_pooled = model.get_pooled_output()
        self.model_inputs_1 = tf.nn.dropout(
            self.embedded_pooled, self.dropout
        )
Пример #12
0
def optimize_bert_graph(args, logger=None):
    if not logger:
        logger = set_logger(colored('GRAPHOPT', 'cyan'), args.verbose)
    try:
        if not os.path.exists(args.model_pb_dir):
            os.mkdir(args.model_pb_dir)
        pb_file = os.path.join(args.model_pb_dir, 'bert_model.pb')
        if os.path.exists(pb_file):
            return pb_file
        # we don't need GPU for optimizing the graph
        tf = import_tf(verbose=args.verbose)
        from tensorflow.python.tools.optimize_for_inference_lib import optimize_for_inference

        config = tf.ConfigProto(device_count={'GPU': 0}, allow_soft_placement=True)

        config_fp = os.path.join(args.model_dir, args.config_name)
        init_checkpoint = os.path.join(args.tuned_model_dir or args.bert_model_dir, args.ckpt_name)
        if args.fp16:
            logger.warning('fp16 is turned on! '
                           'Note that not all CPU GPU support fast fp16 instructions, '
                           'worst case you will have degraded performance!')
        logger.info('model config: %s' % config_fp)
        logger.info(
            'checkpoint%s: %s' % (
            ' (override by the fine-tuned model)' if args.tuned_model_dir else '', init_checkpoint))
        with tf.gfile.GFile(config_fp, 'r') as f:
            bert_config = modeling.BertConfig.from_dict(json.load(f))

        logger.info('build graph...')
        # input placeholders, not sure if they are friendly to XLA
        input_ids = tf.placeholder(tf.int32, (None, args.max_seq_len), 'input_ids')
        input_mask = tf.placeholder(tf.int32, (None, args.max_seq_len), 'input_mask')
        input_type_ids = tf.placeholder(tf.int32, (None, args.max_seq_len), 'input_type_ids')

        jit_scope = tf.contrib.compiler.jit.experimental_jit_scope if args.xla else contextlib.suppress

        with jit_scope():
            input_tensors = [input_ids, input_mask, input_type_ids]

            model = modeling.BertModel(
                config=bert_config,
                is_training=False,
                input_ids=input_ids,
                input_mask=input_mask,
                token_type_ids=input_type_ids,
                use_one_hot_embeddings=False)

            tvars = tf.trainable_variables()

            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint)

            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

            minus_mask = lambda x, m: x - tf.expand_dims(1.0 - m, axis=-1) * 1e30
            mul_mask = lambda x, m: x * tf.expand_dims(m, axis=-1)
            masked_reduce_max = lambda x, m: tf.reduce_max(minus_mask(x, m), axis=1)
            masked_reduce_mean = lambda x, m: tf.reduce_sum(mul_mask(x, m), axis=1) / (
                    tf.reduce_sum(m, axis=1, keepdims=True) + 1e-10)

            with tf.variable_scope("pooling"):
                if len(args.pooling_layer) == 1:
                    encoder_layer = model.all_encoder_layers[args.pooling_layer[0]]
                else:
                    all_layers = [model.all_encoder_layers[l] for l in args.pooling_layer]
                    encoder_layer = tf.concat(all_layers, -1)

                input_mask = tf.cast(input_mask, tf.float32)
                if args.pooling_strategy == PoolingStrategy.REDUCE_MEAN:
                    pooled = masked_reduce_mean(encoder_layer, input_mask)
                elif args.pooling_strategy == PoolingStrategy.REDUCE_MAX:
                    pooled = masked_reduce_max(encoder_layer, input_mask)
                elif args.pooling_strategy == PoolingStrategy.REDUCE_MEAN_MAX:
                    pooled = tf.concat([masked_reduce_mean(encoder_layer, input_mask),
                                        masked_reduce_max(encoder_layer, input_mask)], axis=1)
                elif args.pooling_strategy == PoolingStrategy.FIRST_TOKEN or \
                        args.pooling_strategy == PoolingStrategy.CLS_TOKEN:
                    pooled = tf.squeeze(encoder_layer[:, 0:1, :], axis=1)
                elif args.pooling_strategy == PoolingStrategy.LAST_TOKEN or \
                        args.pooling_strategy == PoolingStrategy.SEP_TOKEN:
                    seq_len = tf.cast(tf.reduce_sum(input_mask, axis=1), tf.int32)
                    rng = tf.range(0, tf.shape(seq_len)[0])
                    indexes = tf.stack([rng, seq_len - 1], 1)
                    pooled = tf.gather_nd(encoder_layer, indexes)
                elif args.pooling_strategy == PoolingStrategy.NONE:
                    pooled = mul_mask(encoder_layer, input_mask)
                else:
                    raise NotImplementedError()

            if args.fp16:
                pooled = tf.cast(pooled, tf.float16)

            pooled = tf.identity(pooled, 'final_encodes')
            output_tensors = [pooled]
            tmp_g = tf.get_default_graph().as_graph_def()

        with tf.Session(config=config) as sess:
            logger.info('load parameters from checkpoint...')

            sess.run(tf.global_variables_initializer())
            dtypes = [n.dtype for n in input_tensors]
            logger.info('optimize...')
            tmp_g = optimize_for_inference(
                tmp_g,
                [n.name[:-2] for n in input_tensors],
                [n.name[:-2] for n in output_tensors],
                [dtype.as_datatype_enum for dtype in dtypes],
                False)

            logger.info('freeze...')
            tmp_g = convert_variables_to_constants(sess, tmp_g, [n.name[:-2] for n in output_tensors],
                                                   use_fp16=args.fp16)

        logger.info('write graph to a tmp file: %s' % args.model_pb_dir)
        with tf.gfile.GFile(pb_file, 'wb') as f:
            f.write(tmp_g.SerializeToString())
    except Exception:
        logger.error('fail to optimize the graph!', exc_info=True)
Пример #13
0
graph = tf.get_default_graph()
with graph.as_default():
    print("going to restore checkpoint")
    #sess.run(tf.global_variables_initializer())
    input_ids_p = tf.placeholder(tf.int32, [batch_size, MAX_SEQ_LENGTH],
                                 name="input_ids")
    input_mask_p = tf.placeholder(tf.int32, [batch_size, MAX_SEQ_LENGTH],
                                  name="input_mask")

    bert_config = modeling.BertConfig.from_json_file(
        os.path.join(bert_dir, 'bert_config.json'))

    model = modeling.BertModel(
        config=bert_config,
        is_training=is_training,
        input_ids=input_ids_p,
        input_mask=input_mask_p,
        token_type_ids=None,  # 默认为单句子分类任务
        use_one_hot_embeddings=use_one_hot_embeddings)

    # embedding = model.get_pooled_output()

    output_layer = model.get_pooled_output()

    hidden_size = output_layer.shape[-1].value

    output_weights = tf.get_variable(
        "output_weights", [2, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable("output_bias", [2],
Пример #14
0
def create_model(bert_config,
                 is_training,
                 input_ids,
                 input_mask,
                 segment_ids,
                 labels,
                 num_labels,
                 use_one_hot_embeddings,
                 pos_ids,
                 dropout_rate=1.0,
                 lstm_size=1,
                 cell='lstm',
                 num_layers=1,
                 add_pos_embedding=True):
    """
    创建X模型
    :param bert_config: bert 配置
    :param is_training:
    :param input_ids: 数据的idx 表示
    :param input_mask:
    :param segment_ids:
    :param pos_ids: 词性的idx 表示
    :param labels: 标签的idx 表示
    :param num_labels: 类别数量
    :param use_one_hot_embeddings:
    :return:
    """
    # 使用数据加载BertModel,获取对应的字embedding
    import tensorflow as tf
    from bert_base.bert import modeling
    model = modeling.BertModel(config=bert_config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=use_one_hot_embeddings)
    # 获取对应的embedding 输入数据[batch_size, seq_length, embedding_size]
    embedding = model.get_sequence_output()
    # jzhang add
    if add_pos_embedding:
        embedding = embedding_addpos(embedding, pos_ids)
    max_seq_length = embedding.shape[1].value
    # 算序列真实长度
    used = tf.sign(tf.abs(input_ids))
    lengths = tf.reduce_sum(
        used, reduction_indices=1)  # [batch_size] 大小的向量,包含了当前batch中的序列长度
    # 添加CRF output layer
    blstm_crf = BLSTM_CRF(embedded_chars=embedding,
                          hidden_unit=lstm_size,
                          cell_type=cell,
                          num_layers=num_layers,
                          dropout_rate=dropout_rate,
                          initializers=initializers,
                          num_labels=num_labels,
                          seq_length=max_seq_length,
                          labels=labels,
                          lengths=lengths,
                          is_training=is_training)
    # jzhang: 设置参数crf_only=True,控制模型只使用CRF进行解码
    #  如果设置crf_only=False,模型将使用BiLSTM-CRF作为Decoder
    rst = blstm_crf.add_blstm_crf_layer(crf_only=False)
    return rst
Пример #15
0
bert_path = '/home/ywd/tf_model/pre_training_model/chinese_L-12_H-768_A-12/'
init_checkpoint = os.path.join(bert_path, 'bert_model.ckpt')
bert_config_file  = os.path.join(bert_path, 'bert_config.json')
vocab_file = os.path.join(bert_path, 'vocab.txt')

bert_config = modeling.BertConfig.from_json_file(bert_config_file)

with tf.Session() as sess:
    input_ids = tf.placeholder(tf.int32, shape=[20, 128])
    input_mask = tf.placeholder(tf.int32, shape=[20, 128])
    token_type_ids= tf.placeholder(tf.int32, shape=[20, 128])
    model = modeling.BertModel(
            config=bert_config,
            is_training=True,
            input_ids=input_ids,
            input_mask=input_mask,
            token_type_ids=token_type_ids,
            use_one_hot_embeddings=False
        )
    # 调用init_from_checkpoint方法
    # 最后初始化变量
    graph = tf.get_default_graph()
    tvars = tf.trainable_variables()

    (assignment_map,
     initialized_variable_names) = modeling.get_assignment_map_from_checkpoint(
        tvars, init_checkpoint)

    tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

    # 初始化所有的变量