def create_model(args, ernie_config):
    input_names = ("src_ids", "sent_ids", "pos_ids", "task_ids", "input_mask")
    shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
            [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
            [-1, args.max_seq_len, 1]]
    dtypes=[
        'int64', 'int64', 'int64', 'int64', 'float32'
    ]

    inputs = [fluid.data(name, shape, dtype=dtype) for name, shape, dtype in zip(input_names, shapes, dtypes)]
    (src_ids, sent_ids, pos_ids, task_ids, input_mask) = inputs

    ernie = ErnieModel(
        src_ids=src_ids,
        position_ids=pos_ids,
        sentence_ids=sent_ids,
        task_ids=task_ids,
        input_mask=input_mask,
        config=ernie_config,
        use_fp16=args.use_fp16)

    seq_out = ernie.get_sequence_output()
    cls_feats = ernie.get_pooled_output()
    # dummy layers to name the latent layers. the save_inf_model produce uncomprehensible names
    # like 'save_infer_model/scale_1'
    seq_out = fluid.layers.scale(seq_out, scale=1.0, name='ernie_sequence_latent')
    cls_feats = fluid.layers.scale(cls_feats, scale=1.0, name='ernie_classification')

    for i, inp in enumerate(inputs):
        print(f'input[{i}]:', inp.name, inp.shape, inp.dtype)
    print('sequence_output  :', seq_out.name, seq_out.shape, seq_out.dtype)
    print('classifier_output:', cls_feats.name, cls_feats.shape, cls_feats.dtype)
    return inputs, [seq_out, cls_feats]
Exemplo n.º 2
0
def create_model(args, pyreader_name, ernie_config):
    pyreader = fluid.layers.py_reader(
        capacity=50,
        shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
                [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1]],
        dtypes=['int64', 'int64', 'int64', 'float', 'int64'],
        lod_levels=[0, 0, 0, 0, 0],
        name=pyreader_name,
        use_double_buffer=True)

    (src_ids, sent_ids, pos_ids, input_mask,
     seq_lens) = fluid.layers.read_file(pyreader)

    ernie = ErnieModel(src_ids=src_ids,
                       position_ids=pos_ids,
                       sentence_ids=sent_ids,
                       input_mask=input_mask,
                       config=ernie_config)

    enc_out = ernie.get_sequence_output()
    unpad_enc_out = fluid.layers.sequence_unpad(enc_out, length=seq_lens)
    cls_feats = ernie.get_pooled_output()

    # set persistable = True to avoid memory opimizing
    enc_out.persistable = True
    unpad_enc_out.persistable = True
    cls_feats.persistable = True

    graph_vars = {
        "cls_embeddings": cls_feats,
        "top_layer_embeddings": unpad_enc_out,
    }

    return pyreader, graph_vars
Exemplo n.º 3
0
def create_model(args, pyreader_name, ernie_config, is_prediction=False):
    pyreader = fluid.layers.py_reader(
        capacity=50,
        shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
                [-1, args.max_seq_len, 1],
                [-1, args.max_seq_len, args.max_seq_len],
                [-1, args.max_seq_len, 1], [-1, 1]],
        dtypes=['int64', 'int64', 'int64', 'float', 'int64', 'int64'],
        lod_levels=[0, 0, 0, 0, 0, 0],
        name=pyreader_name,
        use_double_buffer=True)

    (src_ids, sent_ids, pos_ids, self_attn_mask, labels,
     seq_lens) = fluid.layers.read_file(pyreader)

    ernie = ErnieModel(src_ids=src_ids,
                       position_ids=pos_ids,
                       sentence_ids=sent_ids,
                       self_attn_mask=self_attn_mask,
                       config=ernie_config,
                       use_fp16=args.use_fp16)

    enc_out = ernie.get_sequence_output()
    logits = fluid.layers.fc(
        input=enc_out,
        size=args.num_labels,
        num_flatten_dims=2,
        param_attr=fluid.ParamAttr(
            name="cls_seq_label_out_w",
            initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
        bias_attr=fluid.ParamAttr(name="cls_seq_label_out_b",
                                  initializer=fluid.initializer.Constant(0.)))

    ret_labels = fluid.layers.reshape(x=labels, shape=[-1, 1])
    ret_infers = fluid.layers.reshape(x=fluid.layers.argmax(logits, axis=2),
                                      shape=[-1, 1])

    labels = fluid.layers.flatten(labels, axis=2)
    ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
        logits=fluid.layers.flatten(logits, axis=2),
        label=labels,
        return_softmax=True)
    loss = fluid.layers.mean(x=ce_loss)

    if args.use_fp16 and args.loss_scaling > 1.0:
        loss *= args.loss_scaling

    graph_vars = {
        "loss": loss,
        "probs": probs,
        "labels": ret_labels,
        "infers": ret_infers,
        "seq_lens": seq_lens
    }

    for k, v in graph_vars.items():
        v.persistable = True

    return pyreader, graph_vars
Exemplo n.º 4
0
    def forward(self, features):
        src_ids, sent_ids, input_seqlen = features
        zero = L.fill_constant([1], dtype='int64', value=0)
        input_mask = L.cast(L.equal(src_ids, zero),
                            'float32')  # assume pad id == 0
        #input_mask = L.unsqueeze(input_mask, axes=[2])
        d_shape = L.shape(src_ids)
        seqlen = d_shape[1]
        batch_size = d_shape[0]
        pos_ids = L.unsqueeze(L.range(0, seqlen, 1, dtype='int32'), axes=[0])
        pos_ids = L.expand(pos_ids, [batch_size, 1])
        pos_ids = L.unsqueeze(pos_ids, axes=[2])
        pos_ids = L.cast(pos_ids, 'int64')
        pos_ids.stop_gradient = True
        input_mask.stop_gradient = True
        task_ids = L.zeros_like(
            src_ids) + self.hparam.task_id  #this shit wont use at the moment
        task_ids.stop_gradient = True

        model = ErnieModel(src_ids=src_ids,
                           position_ids=pos_ids,
                           sentence_ids=sent_ids,
                           task_ids=task_ids,
                           input_mask=input_mask,
                           config=self.hparam,
                           use_fp16=self.hparam['use_fp16'])

        enc_out = model.get_sequence_output()
        logits = L.fc(
            input=enc_out,
            size=self.num_label,
            num_flatten_dims=2,
            param_attr=F.ParamAttr(
                name="cls_seq_label_out_w",
                initializer=F.initializer.TruncatedNormal(scale=0.02)),
            bias_attr=F.ParamAttr(name="cls_seq_label_out_b",
                                  initializer=F.initializer.Constant(0.)))

        propeller.summary.histogram('pred', logits)

        return logits, input_seqlen
Exemplo n.º 5
0
def create_model(args, pyreader_name, ernie_config, is_prediction=False):
    src_ids = fluid.layers.data(name='1',
                                shape=[-1, args.max_seq_len, 1],
                                dtype='int64')
    sent_ids = fluid.layers.data(name='2',
                                 shape=[-1, args.max_seq_len, 1],
                                 dtype='int64')
    pos_ids = fluid.layers.data(name='3',
                                shape=[-1, args.max_seq_len, 1],
                                dtype='int64')
    task_ids = fluid.layers.data(name='4',
                                 shape=[-1, args.max_seq_len, 1],
                                 dtype='int64')
    input_mask = fluid.layers.data(name='5',
                                   shape=[-1, args.max_seq_len, 1],
                                   dtype='float32')
    labels = fluid.layers.data(name='7',
                               shape=[-1, args.max_seq_len, 1],
                               dtype='int64')
    seq_lens = fluid.layers.data(name='8', shape=[-1], dtype='int64')

    pyreader = fluid.io.DataLoader.from_generator(feed_list=[
        src_ids, sent_ids, pos_ids, task_ids, input_mask, labels, seq_lens
    ],
                                                  capacity=70,
                                                  iterable=False)

    ernie = ErnieModel(src_ids=src_ids,
                       position_ids=pos_ids,
                       sentence_ids=sent_ids,
                       task_ids=task_ids,
                       input_mask=input_mask,
                       config=ernie_config,
                       use_fp16=args.use_fp16)

    enc_out = ernie.get_sequence_output()
    enc_out = fluid.layers.dropout(x=enc_out,
                                   dropout_prob=0.1,
                                   dropout_implementation="upscale_in_train")
    logits = fluid.layers.fc(
        input=enc_out,
        size=args.num_labels,
        num_flatten_dims=2,
        param_attr=fluid.ParamAttr(
            name="cls_seq_label_out_w",
            initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
        bias_attr=fluid.ParamAttr(name="cls_seq_label_out_b",
                                  initializer=fluid.initializer.Constant(0.)))
    infers = fluid.layers.argmax(logits, axis=2)

    ret_infers = fluid.layers.reshape(x=infers, shape=[-1, 1])
    lod_labels = fluid.layers.sequence_unpad(labels, seq_lens)
    lod_infers = fluid.layers.sequence_unpad(infers, seq_lens)

    (_, _, _, num_infer, num_label, num_correct) = fluid.layers.chunk_eval(
        input=lod_infers,
        label=lod_labels,
        chunk_scheme=args.chunk_scheme,
        num_chunk_types=((args.num_labels - 1) //
                         (len(args.chunk_scheme) - 1)))

    labels = fluid.layers.flatten(labels, axis=2)
    ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
        logits=fluid.layers.flatten(logits, axis=2),
        label=labels,
        return_softmax=True)
    input_mask = fluid.layers.flatten(input_mask, axis=2)
    ce_loss = ce_loss * input_mask
    loss = fluid.layers.mean(x=ce_loss)

    graph_vars = {
        "inputs": src_ids,
        "loss": loss,
        "probs": probs,
        "seqlen": seq_lens,
        "num_infer": num_infer,
        "num_label": num_label,
        "num_correct": num_correct,
    }

    for k, v in graph_vars.items():
        v.persistable = True

    return pyreader, graph_vars
Exemplo n.º 6
0
def create_model(args, pyreader_name, ernie_config, is_training):
    pyreader = fluid.layers.py_reader(capacity=50,
                                      shapes=[[-1, args.max_seq_len, 1],
                                              [-1, args.max_seq_len, 1],
                                              [-1, args.max_seq_len, 1],
                                              [-1, args.max_seq_len, 1],
                                              [-1, args.max_seq_len, 1],
                                              [-1, 1], [-1, 1], [-1, 1]],
                                      dtypes=[
                                          'int64', 'int64', 'int64', 'int64',
                                          'float32', 'int64', 'int64', 'int64'
                                      ],
                                      lod_levels=[0, 0, 0, 0, 0, 0, 0, 0],
                                      name=pyreader_name,
                                      use_double_buffer=True)
    (src_ids, sent_ids, pos_ids, task_ids, input_mask, start_positions,
     end_positions, unique_id) = fluid.layers.read_file(pyreader)

    ernie = ErnieModel(src_ids=src_ids,
                       position_ids=pos_ids,
                       sentence_ids=sent_ids,
                       task_ids=task_ids,
                       input_mask=input_mask,
                       config=ernie_config,
                       use_fp16=args.use_fp16)

    enc_out = ernie.get_sequence_output()
    enc_out = fluid.layers.dropout(x=enc_out,
                                   dropout_prob=0.1,
                                   dropout_implementation="upscale_in_train")

    logits = fluid.layers.fc(
        input=enc_out,
        size=2,
        num_flatten_dims=2,
        param_attr=fluid.ParamAttr(
            name="cls_mrc_out_w",
            initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
        bias_attr=fluid.ParamAttr(name="cls_mrc_out_b",
                                  initializer=fluid.initializer.Constant(0.)))

    logits = fluid.layers.transpose(x=logits, perm=[2, 0, 1])
    start_logits, end_logits = fluid.layers.unstack(x=logits, axis=0)

    batch_ones = fluid.layers.fill_constant_batch_size_like(input=start_logits,
                                                            dtype='int64',
                                                            shape=[1],
                                                            value=1)
    num_seqs = fluid.layers.reduce_sum(input=batch_ones)

    def compute_loss(logits, positions):
        loss = fluid.layers.softmax_with_cross_entropy(logits=logits,
                                                       label=positions)
        loss = fluid.layers.mean(x=loss)
        return loss

    start_loss = compute_loss(start_logits, start_positions)
    end_loss = compute_loss(end_logits, end_positions)
    loss = (start_loss + end_loss) / 2.0
    if args.use_fp16 and args.loss_scaling > 1.0:
        loss *= args.loss_scaling

    graph_vars = {
        "loss": loss,
        "num_seqs": num_seqs,
        "unique_id": unique_id,
        "start_logits": start_logits,
        "end_logits": end_logits
    }

    for k, v in graph_vars.items():
        v.persistable = True

    return pyreader, graph_vars
def create_model(ernie_config, is_training=False):
    if is_training:
        input_fields = {
            'names': [
                'src_ids', 'pos_ids', 'sent_ids', 'input_mask',
                'start_positions', 'end_positions'
            ],
            'shapes': [[None, None], [None, None], [None, None],
                       [None, None, 1], [None, 1], [None, 1]],
            'dtypes': ['int64', 'int64', 'int64', 'float32', 'int64', 'int64'],
            'lod_levels': [0, 0, 0, 0, 0, 0],
        }
    else:
        input_fields = {
            'names':
            ['src_ids', 'pos_ids', 'sent_ids', 'input_mask', 'unique_id'],
            'shapes': [[None, None], [None, None], [None, None],
                       [None, None, 1], [None, 1]],
            'dtypes': ['int64', 'int64', 'int64', 'float32', 'int64'],
            'lod_levels': [0, 0, 0, 0, 0],
        }

    inputs = [
        fluid.data(name=input_fields['names'][i],
                   shape=input_fields['shapes'][i],
                   dtype=input_fields['dtypes'][i],
                   lod_level=input_fields['lod_levels'][i])
        for i in range(len(input_fields['names']))
    ]

    data_loader = fluid.io.DataLoader.from_generator(feed_list=inputs,
                                                     capacity=50,
                                                     iterable=False)

    if is_training:
        (src_ids, pos_ids, sent_ids, input_mask, start_positions,
         end_positions) = inputs
    else:
        (src_ids, pos_ids, sent_ids, input_mask, unique_id) = inputs

    ernie = ErnieModel(src_ids=src_ids,
                       position_ids=pos_ids,
                       sentence_ids=sent_ids,
                       input_mask=input_mask,
                       config=ernie_config,
                       use_fp16=args.use_fp16)

    enc_out = ernie.get_sequence_output()

    logits = fluid.layers.fc(
        input=enc_out,
        size=2,
        num_flatten_dims=2,
        param_attr=fluid.ParamAttr(
            name="cls_squad_out_w",
            initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
        bias_attr=fluid.ParamAttr(name="cls_squad_out_b",
                                  initializer=fluid.initializer.Constant(0.)))

    logits = fluid.layers.transpose(x=logits, perm=[2, 0, 1])
    start_logits, end_logits = fluid.layers.unstack(x=logits, axis=0)

    batch_ones = fluid.layers.fill_constant_batch_size_like(input=start_logits,
                                                            dtype='int64',
                                                            shape=[1],
                                                            value=1)
    num_seqs = fluid.layers.reduce_sum(input=batch_ones)

    if is_training:

        def compute_loss(logits, positions):
            loss = fluid.layers.softmax_with_cross_entropy(logits=logits,
                                                           label=positions)
            loss = fluid.layers.mean(x=loss)
            return loss

        start_loss = compute_loss(start_logits, start_positions)
        end_loss = compute_loss(end_logits, end_positions)
        total_loss = (start_loss + end_loss) / 2.0
        return data_loader, total_loss, num_seqs
    else:
        return data_loader, unique_id, start_logits, end_logits, num_seqs
Exemplo n.º 8
0
def create_model(args, pyreader_name, ernie_config, is_prediction=False):
    pyreader = fluid.layers.py_reader(capacity=50,
                                      shapes=[[-1, args.max_seq_len, 1],
                                              [-1, args.max_seq_len, 1],
                                              [-1, args.max_seq_len, 1],
                                              [-1, args.max_seq_len, 1],
                                              [-1, args.max_seq_len, 1],
                                              [-1, args.max_seq_len, 1],
                                              [-1, 1]],
                                      dtypes=[
                                          'int64', 'int64', 'int64', 'int64',
                                          'float32', 'int64', 'int64'
                                      ],
                                      lod_levels=[0, 0, 0, 0, 0, 0, 0],
                                      name=pyreader_name,
                                      use_double_buffer=True)

    (src_ids, sent_ids, pos_ids, task_ids, input_mask, labels,
     seq_lens) = fluid.layers.read_file(pyreader)

    ernie = ErnieModel(src_ids=src_ids,
                       position_ids=pos_ids,
                       sentence_ids=sent_ids,
                       task_ids=task_ids,
                       input_mask=input_mask,
                       config=ernie_config,
                       use_fp16=args.use_fp16)

    enc_out = ernie.get_sequence_output()
    enc_out = fluid.layers.dropout(x=enc_out,
                                   dropout_prob=0.1,
                                   dropout_implementation="upscale_in_train")
    logits = fluid.layers.fc(
        input=enc_out,
        size=args.num_labels,
        num_flatten_dims=2,
        param_attr=fluid.ParamAttr(
            name="cls_seq_label_out_w",
            initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
        bias_attr=fluid.ParamAttr(name="cls_seq_label_out_b",
                                  initializer=fluid.initializer.Constant(0.)))
    infers = fluid.layers.argmax(logits, axis=2)

    ret_labels = fluid.layers.reshape(x=labels, shape=[-1, 1])
    ret_infers = fluid.layers.reshape(x=infers, shape=[-1, 1])

    lod_labels = fluid.layers.sequence_unpad(labels, seq_lens)
    lod_infers = fluid.layers.sequence_unpad(infers, seq_lens)

    (_, _, _, num_infer, num_label, num_correct) = fluid.layers.chunk_eval(
        input=lod_infers,
        label=lod_labels,
        chunk_scheme=args.chunk_scheme,
        num_chunk_types=((args.num_labels - 1) //
                         (len(args.chunk_scheme) - 1)))

    labels = fluid.layers.flatten(labels, axis=2)
    ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
        logits=fluid.layers.flatten(logits, axis=2),
        label=labels,
        return_softmax=True)
    input_mask = fluid.layers.flatten(input_mask, axis=2)
    ce_loss = ce_loss * input_mask
    loss = fluid.layers.mean(x=ce_loss)

    if args.use_fp16 and args.loss_scaling > 1.0:
        loss *= args.loss_scaling

    graph_vars = {
        "loss": loss,
        "probs": probs,
        "labels": ret_labels,
        "infers": ret_infers,
        "num_infer": num_infer,
        "num_label": num_label,
        "num_correct": num_correct,
        "seq_lens": seq_lens
    }

    for k, v in graph_vars.items():
        v.persistable = True

    return pyreader, graph_vars
Exemplo n.º 9
0
def create_model(args, pyreader_name, ernie_config):
    pyreader = fluid.layers.py_reader(
        capacity=50,  #缓冲区的容量数据个数
        shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
                [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
                [-1, args.max_seq_len, 1],
                [-1, args.max_seq_len, args.num_labels], [-1, 1], [-1, 1],
                [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1]],
        dtypes=[
            'int64', 'int64', 'int64', 'int64', 'float32', 'float32', 'int64',
            'int64', 'int64', 'int64'
        ],
        lod_levels=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        name=pyreader_name,
        use_double_buffer=True)

    (src_ids, sent_ids, pos_ids, task_ids, input_mask, labels, seq_lens,
     example_index, tok_to_orig_start_index,
     tok_to_orig_end_index) = fluid.layers.read_file(pyreader)

    ernie = ErnieModel(src_ids=src_ids,
                       position_ids=pos_ids,
                       sentence_ids=sent_ids,
                       task_ids=task_ids,
                       input_mask=input_mask,
                       config=ernie_config,
                       use_fp16=args.use_fp16)
    #embedding+encoder

    enc_out = ernie.get_sequence_output()
    #get encoder layer
    enc_out = fluid.layers.dropout(x=enc_out,
                                   dropout_prob=0.1,
                                   dropout_implementation="upscale_in_train")
    logits = fluid.layers.fc(
        input=enc_out,
        size=args.num_labels,
        num_flatten_dims=2,
        param_attr=fluid.ParamAttr(
            name="cls_seq_label_out_w",
            initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
        bias_attr=fluid.ParamAttr(name="cls_seq_label_out_b",
                                  initializer=fluid.initializer.Constant(0.)))
    logits = fluid.layers.sigmoid(logits)

    lod_labels = fluid.layers.sequence_unpad(labels, seq_lens)
    lod_logits = fluid.layers.sequence_unpad(logits, seq_lens)
    lod_tok_to_orig_start_index = fluid.layers.sequence_unpad(
        tok_to_orig_start_index, seq_lens)
    lod_tok_to_orig_end_index = fluid.layers.sequence_unpad(
        tok_to_orig_end_index, seq_lens)

    labels = fluid.layers.flatten(labels, axis=2)
    logits = fluid.layers.flatten(logits, axis=2)
    input_mask = fluid.layers.flatten(input_mask, axis=2)

    # calculate loss
    log_logits = fluid.layers.log(logits)
    log_logits_neg = fluid.layers.log(1 - logits)
    ce_loss = 0. - labels * log_logits - (1 - labels) * log_logits_neg

    ce_loss = fluid.layers.reduce_mean(ce_loss, dim=1, keep_dim=True)
    ce_loss = ce_loss * input_mask
    loss = fluid.layers.mean(x=ce_loss)

    graph_vars = {
        "inputs": src_ids,
        "loss": loss,
        "seqlen": seq_lens,
        "lod_logit": lod_logits,
        "lod_label": lod_labels,
        "example_index": example_index,
        "tok_to_orig_start_index": lod_tok_to_orig_start_index,
        "tok_to_orig_end_index": lod_tok_to_orig_end_index
    }

    for k, v in graph_vars.items():
        v.persistable = True

    return pyreader, graph_vars
Exemplo n.º 10
0
def create_model(args, pyreader_name, ernie_config, is_prediction=False):
    """func"""
    pyreader = fluid.layers.py_reader(capacity=50,
                                      shapes=[[-1, args.max_seq_len, 1],
                                              [-1, args.max_seq_len, 1],
                                              [-1, args.max_seq_len, 1],
                                              [-1, args.max_seq_len, 1],
                                              [-1, args.max_seq_len, 1],
                                              [-1, args.max_seq_len, 1], [-1]],
                                      dtypes=[
                                          'int64', 'int64', 'int64', 'int64',
                                          'float32', 'int64', 'int64'
                                      ],
                                      lod_levels=[0, 0, 0, 0, 0, 0, 0],
                                      name=pyreader_name,
                                      use_double_buffer=True)

    (src_ids, sent_ids, pos_ids, task_ids, input_mask, labels,
     seq_lens) = fluid.layers.read_file(pyreader)

    ernie = ErnieModel(src_ids=src_ids,
                       position_ids=pos_ids,
                       sentence_ids=sent_ids,
                       task_ids=task_ids,
                       input_mask=input_mask,
                       config=ernie_config,
                       use_fp16=args.use_fp16)

    enc_out = ernie.get_sequence_output()

    emission = fluid.layers.fc(
        input=enc_out,
        size=args.num_labels,
        param_attr=fluid.ParamAttr(
            initializer=fluid.initializer.Uniform(low=-0.1, high=0.1),
            regularizer=fluid.regularizer.L2DecayRegularizer(
                regularization_coeff=1e-4)),
        num_flatten_dims=2)

    crf_cost = fluid.layers.linear_chain_crf(
        input=emission,
        label=labels,
        param_attr=fluid.ParamAttr(name='crfw',
                                   learning_rate=args.crf_learning_rate),
        length=seq_lens)

    loss = fluid.layers.mean(x=crf_cost)

    crf_decode = fluid.layers.crf_decoding(
        input=emission,
        param_attr=fluid.ParamAttr(name='crfw'),
        length=seq_lens)

    lod_labels = fluid.layers.squeeze(labels, axes=[-1])

    num_chunk_types = (
        (args.num_labels - 1) // (len(args.chunk_scheme) - 1))  # IOB配置

    (_, _, _, num_infer, num_label,
     num_correct) = fluid.layers.chunk_eval(input=crf_decode,
                                            label=lod_labels,
                                            chunk_scheme=args.chunk_scheme,
                                            num_chunk_types=num_chunk_types,
                                            seq_length=seq_lens)
    """
    enc_out = fluid.layers.dropout(x=enc_out,
                                   dropout_prob=0.1,
                                   dropout_implementation="upscale_in_train")

    logits = fluid.layers.fc(
        input=enc_out,
        size=args.num_labels,
        num_flatten_dims=2,
        param_attr=fluid.ParamAttr(
            name="cls_seq_label_out_w",
            initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
        bias_attr=fluid.ParamAttr(name="cls_seq_label_out_b",
                                  initializer=fluid.initializer.Constant(0.)))

    infers = fluid.layers.argmax(logits, axis=2)
    ret_infers = fluid.layers.reshape(x=infers, shape=[-1, 1])
    lod_labels = fluid.layers.sequence_unpad(labels, seq_lens)
    lod_infers = fluid.layers.sequence_unpad(infers, seq_lens)

    num_chunk_types = (
        (args.num_labels - 1) // (len(args.chunk_scheme) - 1))  # IOB配置

    (_, _, _, num_infer, num_label,
     num_correct) = fluid.layers.chunk_eval(input=lod_infers,
                                            label=lod_labels,
                                            chunk_scheme=args.chunk_scheme,
                                            num_chunk_types=num_chunk_types)

    labels = fluid.layers.flatten(labels, axis=2)
    ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
        logits=fluid.layers.flatten(logits, axis=2),
        label=labels,
        return_softmax=True)
    input_mask = fluid.layers.flatten(input_mask, axis=2)
    ce_loss = ce_loss * input_mask
    loss = fluid.layers.mean(x=ce_loss)
    """

    graph_vars = {
        "inputs": src_ids,
        "loss": loss,
        "seqlen": seq_lens,
        "crf_decode": crf_decode,
        "num_infer": num_infer,
        "num_label": num_label,
        "num_correct": num_correct,
    }

    for k, v in graph_vars.items():
        v.persistable = True

    return pyreader, graph_vars
Exemplo n.º 11
0
    def create_model(self, decoding=False):
        if decoding:
            return self.infilling_decode()

        if self.task_type == "dialog":
            emb_num = 4
        else:
            emb_num = 3
        input_shapes = [[-1, self.max_seq_len, 1]] * emb_num + \
                       [[-1, self.max_seq_len, self.max_seq_len]]
        query_input_shapes = [[-1, self.max_seq_len, 1]] * emb_num + \
                             [[-1, self.max_seq_len, self.max_seq_len * 2]]
        input_dtypes = ['int64'] * emb_num + ['float32']
        input_lod_levels = [0] * emb_num + [0]
        shapes = input_shapes + query_input_shapes + [[-1, 1], [-1, 1]]
        dtypes = input_dtypes * 2 + ['int64', 'int64']
        lod_levels = input_lod_levels * 2 + [0, 0]

        inputs = self.to_ternsor(shapes, dtypes, lod_levels)
        pyreader = fluid.io.DataLoader.from_generator(feed_list=inputs,
                                                      capacity=50,
                                                      iterable=False)

        emb_ids = [{}, {}]
        for key, value in zip(self.emb_keys, inputs[:emb_num]):
            emb_ids[0][key] = value
        for key, value in zip(self.emb_keys,
                              inputs[emb_num + 1:emb_num * 2 + 1]):
            emb_ids[1][key] = value

        input_mask, input_query_mask = inputs[emb_num], inputs[2 * emb_num + 1]
        tgt_labels, tgt_pos = inputs[-2:]

        ernie = ErnieModel(emb_ids=emb_ids,
                           input_mask=[input_mask, input_query_mask],
                           config=self.ernie_config,
                           use_fp16=self.use_fp16,
                           task_type=self.task_type)

        enc_out = ernie.get_sequence_output()
        fc_out = self.cal_logit(enc_out, tgt_pos)

        if self.label_smooth:
            out_size = self.ernie_config[
                "tgt_vocab_size"] or self.ernie_config['vocab_size']
            labels = fluid.layers.label_smooth(label=fluid.layers.one_hot(
                input=tgt_labels, depth=out_size),
                                               epsilon=self.label_smooth)

            ce_loss = layers.softmax_with_cross_entropy(logits=fc_out,
                                                        label=labels,
                                                        soft_label=True)
            #probs = fluid.layers.log(fluid.layers.softmax(fc_out))
            #ce_loss = fluid.layers.kldiv_loss(probs, labels, reduction='batchmean')
        else:
            ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
                logits=fc_out, label=tgt_labels, return_softmax=True)

        loss = fluid.layers.mean(x=ce_loss)
        graph_vars = {"loss": loss}
        for k, v in graph_vars.items():
            v.persistable = True

        return pyreader, graph_vars
Exemplo n.º 12
0
def create_model(args,
                 pyreader_name,
                 ernie_config,
                 is_prediction=False,
                 task_name="",
                 is_classify=False,
                 is_regression=False,
                 ernie_version="1.0"):
    if is_classify:
        # 增加邻接矩阵和核心词的shape
        pyreader = fluid.layers.py_reader(capacity=50,
                                          shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
                                                  [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
                                                  [-1, args.max_seq_len, 1], [-1, 1], [-1, 1],
                                                  [-1, args.max_seq_len, args.max_seq_len], [-1, 2]],
                                          dtypes=[
                                              'int64', 'int64', 'int64', 'int64', 'float32', 'int64', 'int64', 'int64',
                                              'int64'
                                          ],
                                          lod_levels=[0, 0, 0, 0, 0, 0, 0, 0, 0],
                                          name=task_name + "_" + pyreader_name,
                                          use_double_buffer=True)
    elif is_regression:
        pyreader = fluid.layers.py_reader(capacity=50,
                                          shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
                                                  [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
                                                  [-1, args.max_seq_len, 1], [-1, 1], [-1, 1]],
                                          dtypes=['int64', 'int64', 'int64', 'int64', 'float32', 'float32', 'int64'],
                                          lod_levels=[0, 0, 0, 0, 0, 0, 0],
                                          name=task_name + "_" + pyreader_name,
                                          use_double_buffer=True)

    (src_ids, sent_ids, pos_ids, task_ids, input_mask, labels, qids, adj_mat,
     head_ids) = fluid.layers.read_file(pyreader)

    ernie = ErnieModel(src_ids=src_ids,
                       position_ids=pos_ids,
                       sentence_ids=sent_ids,
                       task_ids=task_ids,
                       input_mask=input_mask,
                       config=ernie_config,
                       use_fp16=args.use_fp16)

    erinie_output = ernie.get_sequence_output()
    cls_feats = ernie.get_pooled_output()

    # 增加GAT网络
    gat = gnn.GAT(input_size=768, hidden_size=100, output_size=50, dropout=0.0, alpha=0.1, heads=12, layer=2)
    # 将ernie的表示和邻接矩阵输入到gat网络中得到包含句子结构信息的表示
    gat_emb = gat.forward(erinie_output, adj_mat)
    # 提取核心词的表示
    gat_emb = utils.index_sample(gat_emb, head_ids)
    # 将[CLS]和核心词的表示拼接,供下游网络使用
    cls_feats = fluid.layers.concat([cls_feats, gat_emb], axis=1)

    cls_feats = fluid.layers.dropout(x=cls_feats, dropout_prob=0.1, dropout_implementation="upscale_in_train")
    logits = fluid.layers.fc(input=cls_feats,
                             size=args.num_labels,
                             param_attr=fluid.ParamAttr(name=task_name + "_cls_out_w",
                                                        initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
                             bias_attr=fluid.ParamAttr(name=task_name + "_cls_out_b",
                                                       initializer=fluid.initializer.Constant(0.)))

    if is_prediction:
        probs = fluid.layers.softmax(logits)
        feed_targets_name = [src_ids.name, sent_ids.name, pos_ids.name, input_mask.name]
        if ernie_version == "2.0":
            feed_targets_name += [task_ids.name]
        return pyreader, probs, feed_targets_name

    assert is_classify != is_regression, 'is_classify or is_regression must be true and only one of them can be true'
    num_seqs = fluid.layers.create_tensor(dtype='int64')
    if is_classify:
        ce_loss, probs = fluid.layers.softmax_with_cross_entropy(logits=logits, label=labels, return_softmax=True)
        loss = fluid.layers.mean(x=ce_loss)
        accuracy = fluid.layers.accuracy(input=probs, label=labels, total=num_seqs)
        graph_vars = {
            "loss": loss,
            "probs": probs,
            "accuracy": accuracy,
            "labels": labels,
            "num_seqs": num_seqs,
            "qids": qids
        }
    elif is_regression:
        cost = fluid.layers.square_error_cost(input=logits, label=labels)
        loss = fluid.layers.mean(x=cost)
        graph_vars = {"loss": loss, "probs": logits, "labels": labels, "num_seqs": num_seqs, "qids": qids}
    else:
        raise ValueError('unsupported fine tune mode. only supported classify/regression')

    return pyreader, graph_vars