def cls_from_ernie(
    args,
    src_ids,
    position_ids,
    sentence_ids,
    task_ids,
    input_mask,
    config,
    use_fp16,
):
    """cls_from_ernie"""
    ernie = ErnieModel(
        src_ids=src_ids,
        position_ids=position_ids,
        sentence_ids=sentence_ids,
        task_ids=task_ids,
        input_mask=input_mask,
        config=config,
        use_fp16=use_fp16,
    )
    cls_feats = ernie.get_pooled_output()
    cls_feats = fluid.layers.dropout(
        x=cls_feats,
        dropout_prob=0.1,
        dropout_implementation="upscale_in_train",
    )
    return cls_feats
Exemplo n.º 2
0
def create_model(args, pyreader_name, ernie_config):
    pyreader = fluid.layers.py_reader(
        capacity=50,
        shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
                [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1]],
        dtypes=['int64', 'int64', 'int64', 'float', 'int64'],
        lod_levels=[0, 0, 0, 0, 0],
        name=pyreader_name,
        use_double_buffer=True)

    (src_ids, sent_ids, pos_ids, input_mask,
     seq_lens) = fluid.layers.read_file(pyreader)

    ernie = ErnieModel(src_ids=src_ids,
                       position_ids=pos_ids,
                       sentence_ids=sent_ids,
                       input_mask=input_mask,
                       config=ernie_config)

    enc_out = ernie.get_sequence_output()
    unpad_enc_out = fluid.layers.sequence_unpad(enc_out, length=seq_lens)
    cls_feats = ernie.get_pooled_output()

    # set persistable = True to avoid memory opimizing
    enc_out.persistable = True
    unpad_enc_out.persistable = True
    cls_feats.persistable = True

    graph_vars = {
        "cls_embeddings": cls_feats,
        "top_layer_embeddings": unpad_enc_out,
    }

    return pyreader, graph_vars
def create_model(args, ernie_config):
    input_names = ("src_ids", "sent_ids", "pos_ids", "task_ids", "input_mask")
    shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
            [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
            [-1, args.max_seq_len, 1]]
    dtypes=[
        'int64', 'int64', 'int64', 'int64', 'float32'
    ]

    inputs = [fluid.data(name, shape, dtype=dtype) for name, shape, dtype in zip(input_names, shapes, dtypes)]
    (src_ids, sent_ids, pos_ids, task_ids, input_mask) = inputs

    ernie = ErnieModel(
        src_ids=src_ids,
        position_ids=pos_ids,
        sentence_ids=sent_ids,
        task_ids=task_ids,
        input_mask=input_mask,
        config=ernie_config,
        use_fp16=args.use_fp16)

    seq_out = ernie.get_sequence_output()
    cls_feats = ernie.get_pooled_output()
    # dummy layers to name the latent layers. the save_inf_model produce uncomprehensible names
    # like 'save_infer_model/scale_1'
    seq_out = fluid.layers.scale(seq_out, scale=1.0, name='ernie_sequence_latent')
    cls_feats = fluid.layers.scale(cls_feats, scale=1.0, name='ernie_classification')

    for i, inp in enumerate(inputs):
        print(f'input[{i}]:', inp.name, inp.shape, inp.dtype)
    print('sequence_output  :', seq_out.name, seq_out.shape, seq_out.dtype)
    print('classifier_output:', cls_feats.name, cls_feats.shape, cls_feats.dtype)
    return inputs, [seq_out, cls_feats]
Exemplo n.º 4
0
def create_model(pyreader_name, ernie_config):
    pyreader = fluid.layers.py_reader(
        capacity=70,
        shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
                [-1, args.max_seq_len, 1],
                [-1, args.max_seq_len, args.max_seq_len], [-1, 1], [-1, 1],
                [-1, 1]],
        dtypes=[
            'int64', 'int64', 'int64', 'float32', 'int64', 'int64', 'int64'
        ],
        lod_levels=[0, 0, 0, 0, 0, 0, 0],
        name=pyreader_name,
        use_double_buffer=True)

    (src_ids, pos_ids, sent_ids, input_mask, mask_label, mask_pos,
     labels) = fluid.layers.read_file(pyreader)

    ernie = ErnieModel(src_ids=src_ids,
                       position_ids=pos_ids,
                       sentence_ids=sent_ids,
                       input_mask=input_mask,
                       config=ernie_config,
                       weight_sharing=args.weight_sharing,
                       use_fp16=args.use_fp16)

    next_sent_acc, mask_lm_loss, total_loss = ernie.get_pretraining_output(
        mask_label, mask_pos, labels, args.next_sen_coef)

    if args.use_fp16 and args.loss_scaling > 1.0:
        total_loss *= args.loss_scaling

    return pyreader, next_sent_acc, mask_lm_loss, total_loss
Exemplo n.º 5
0
def create_model(args, pyreader_name, ernie_config, is_prediction=False):
    pyreader = fluid.layers.py_reader(
        capacity=50,
        shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
                [-1, args.max_seq_len, 1],
                [-1, args.max_seq_len, args.max_seq_len],
                [-1, args.max_seq_len, 1], [-1, 1]],
        dtypes=['int64', 'int64', 'int64', 'float', 'int64', 'int64'],
        lod_levels=[0, 0, 0, 0, 0, 0],
        name=pyreader_name,
        use_double_buffer=True)

    (src_ids, sent_ids, pos_ids, self_attn_mask, labels,
     seq_lens) = fluid.layers.read_file(pyreader)

    ernie = ErnieModel(src_ids=src_ids,
                       position_ids=pos_ids,
                       sentence_ids=sent_ids,
                       self_attn_mask=self_attn_mask,
                       config=ernie_config,
                       use_fp16=args.use_fp16)

    enc_out = ernie.get_sequence_output()
    logits = fluid.layers.fc(
        input=enc_out,
        size=args.num_labels,
        num_flatten_dims=2,
        param_attr=fluid.ParamAttr(
            name="cls_seq_label_out_w",
            initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
        bias_attr=fluid.ParamAttr(name="cls_seq_label_out_b",
                                  initializer=fluid.initializer.Constant(0.)))

    ret_labels = fluid.layers.reshape(x=labels, shape=[-1, 1])
    ret_infers = fluid.layers.reshape(x=fluid.layers.argmax(logits, axis=2),
                                      shape=[-1, 1])

    labels = fluid.layers.flatten(labels, axis=2)
    ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
        logits=fluid.layers.flatten(logits, axis=2),
        label=labels,
        return_softmax=True)
    loss = fluid.layers.mean(x=ce_loss)

    if args.use_fp16 and args.loss_scaling > 1.0:
        loss *= args.loss_scaling

    graph_vars = {
        "loss": loss,
        "probs": probs,
        "labels": ret_labels,
        "infers": ret_infers,
        "seq_lens": seq_lens
    }

    for k, v in graph_vars.items():
        v.persistable = True

    return pyreader, graph_vars
Exemplo n.º 6
0
    def forward(self, features):
        src_ids, sent_ids = features
        dtype = 'float16' if self.hparam['fp16'] else 'float32'
        zero = L.fill_constant([1], dtype='int64', value=0)
        input_mask = L.cast(L.logical_not(L.equal(src_ids, zero)), dtype) # assume pad id == 0
        #input_mask = L.unsqueeze(input_mask, axes=[2])
        d_shape = L.shape(src_ids)
        seqlen = d_shape[1]
        batch_size = d_shape[0]
        pos_ids = L.unsqueeze(L.range(0, seqlen, 1, dtype='int32'), axes=[0])
        pos_ids = L.expand(pos_ids, [batch_size, 1])
        pos_ids = L.unsqueeze(pos_ids, axes=[2])
        pos_ids = L.cast(pos_ids, 'int64')
        pos_ids.stop_gradient = True
        input_mask.stop_gradient = True
        task_ids = L.zeros_like(src_ids) + self.hparam.task_id #this shit wont use at the moment
        task_ids.stop_gradient = True

        bert = ErnieModel(
            src_ids=src_ids,
            position_ids=pos_ids,
            sentence_ids=sent_ids,
            task_ids=task_ids,
            input_mask=input_mask,
            config=self.hparam,
            use_fp16=self.hparam['fp16']
        )

        cls_feats = bert.get_pooled_output()

        cls_feats = L.dropout(
            x=cls_feats,
            dropout_prob=0.1,
            dropout_implementation="upscale_in_train"
        )

        logits = L.fc(
            input=cls_feats,
            size=self.hparam['num_label'],
            param_attr=F.ParamAttr(
                name="cls_out_w",
                initializer=F.initializer.TruncatedNormal(scale=0.02)),
            bias_attr=F.ParamAttr(
                name="cls_out_b", initializer=F.initializer.Constant(0.))
        )

        propeller.summary.histogram('pred', logits)

        if self.mode is propeller.RunMode.PREDICT:
            probs = L.softmax(logits)
            return probs
        else:
            return logits
Exemplo n.º 7
0
    def _model(is_noise=False):
        ernie = ErnieModel(src_ids=src_ids,
                           position_ids=pos_ids,
                           sentence_ids=sent_ids,
                           task_ids=task_ids,
                           input_mask=input_mask,
                           config=ernie_config,
                           is_noise=is_noise)

        cls_feats = ernie.get_pooled_output()
        if not is_noise:
            cls_feats = fluid.layers.dropout(
                x=cls_feats,
                dropout_prob=0.1,
                dropout_implementation="upscale_in_train")
        logits = fluid.layers.fc(
            input=cls_feats,
            size=args.num_labels,
            param_attr=fluid.ParamAttr(
                name=task_name + "_cls_out_w",
                initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
            bias_attr=fluid.ParamAttr(
                name=task_name + "_cls_out_b",
                initializer=fluid.initializer.Constant(0.)))
        """
        if is_prediction:
            probs = fluid.layers.softmax(logits)
            feed_targets_name = [
                src_ids.name, sent_ids.name, pos_ids.name, input_mask.name
            ]
            if ernie_version == "2.0":
                feed_targets_name += [task_ids.name]
            return pyreader, probs, feed_targets_name
        """

        num_seqs = fluid.layers.create_tensor(dtype='int64')
        ## add focal loss
        ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
            logits=logits, label=labels, return_softmax=True)
        loss = fluid.layers.mean(x=ce_loss)
        accuracy = fluid.layers.accuracy(input=probs,
                                         label=labels,
                                         total=num_seqs)
        graph_vars = {
            "loss": loss,
            "probs": probs,
            "accuracy": accuracy,
            "labels": labels,
            "num_seqs": num_seqs,
            "qids": qids
        }
        return graph_vars
Exemplo n.º 8
0
    def forward(self, features):
        src_ids, sent_ids, input_seqlen = features
        zero = L.fill_constant([1], dtype='int64', value=0)
        input_mask = L.cast(L.equal(src_ids, zero),
                            'float32')  # assume pad id == 0
        #input_mask = L.unsqueeze(input_mask, axes=[2])
        d_shape = L.shape(src_ids)
        seqlen = d_shape[1]
        batch_size = d_shape[0]
        pos_ids = L.unsqueeze(L.range(0, seqlen, 1, dtype='int32'), axes=[0])
        pos_ids = L.expand(pos_ids, [batch_size, 1])
        pos_ids = L.unsqueeze(pos_ids, axes=[2])
        pos_ids = L.cast(pos_ids, 'int64')
        pos_ids.stop_gradient = True
        input_mask.stop_gradient = True
        task_ids = L.zeros_like(
            src_ids) + self.hparam.task_id  #this shit wont use at the moment
        task_ids.stop_gradient = True

        model = ErnieModel(src_ids=src_ids,
                           position_ids=pos_ids,
                           sentence_ids=sent_ids,
                           task_ids=task_ids,
                           input_mask=input_mask,
                           config=self.hparam,
                           use_fp16=self.hparam['use_fp16'])

        enc_out = model.get_sequence_output()
        logits = L.fc(
            input=enc_out,
            size=self.num_label,
            num_flatten_dims=2,
            param_attr=F.ParamAttr(
                name="cls_seq_label_out_w",
                initializer=F.initializer.TruncatedNormal(scale=0.02)),
            bias_attr=F.ParamAttr(name="cls_seq_label_out_b",
                                  initializer=F.initializer.Constant(0.)))

        propeller.summary.histogram('pred', logits)

        return logits, input_seqlen
Exemplo n.º 9
0
def create_model_predict(args, ernie_config, is_prediction=False):
    (src_ids, sent_ids, pos_ids, input_mask, task_ids) = make_all_inputs(args)

    ernie = ErnieModel(src_ids=src_ids,
                       position_ids=pos_ids,
                       sentence_ids=sent_ids,
                       task_ids=task_ids,
                       input_mask=input_mask,
                       config=ernie_config,
                       use_fp16=args.use_fp16)

    cls_feats = ernie.get_pooled_output()
    cls_feats = fluid.layers.dropout(x=cls_feats,
                                     dropout_prob=0.1,
                                     dropout_implementation="upscale_in_train")
    logits = fluid.layers.fc(
        input=cls_feats,
        size=args.num_labels,
        param_attr=fluid.ParamAttr(
            name="_cls_out_w",
            initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
        bias_attr=fluid.ParamAttr(name="_cls_out_b",
                                  initializer=fluid.initializer.Constant(0.)))

    if is_prediction:
        probs = fluid.layers.softmax(logits)
        feed_targets_name = [
            src_ids.name, pos_ids.name, sent_ids.name, input_mask.name
        ]
        graph_vars = {
            "probs": probs,
        }

        for k, v in graph_vars.items():
            v.persistable = True

        return probs, graph_vars

    return graph_vars
Exemplo n.º 10
0
def create_model(args, pyreader_name, ernie_config, is_prediction=False):
    """func"""
    pyreader = fluid.layers.py_reader(capacity=50,
                                      shapes=[[-1, args.max_seq_len, 1],
                                              [-1, args.max_seq_len, 1],
                                              [-1, args.max_seq_len, 1],
                                              [-1, args.max_seq_len, 1],
                                              [-1, args.max_seq_len, 1],
                                              [-1, args.max_seq_len, 1], [-1]],
                                      dtypes=[
                                          'int64', 'int64', 'int64', 'int64',
                                          'float32', 'int64', 'int64'
                                      ],
                                      lod_levels=[0, 0, 0, 0, 0, 0, 0],
                                      name=pyreader_name,
                                      use_double_buffer=True)

    (src_ids, sent_ids, pos_ids, task_ids, input_mask, labels,
     seq_lens) = fluid.layers.read_file(pyreader)

    ernie = ErnieModel(src_ids=src_ids,
                       position_ids=pos_ids,
                       sentence_ids=sent_ids,
                       task_ids=task_ids,
                       input_mask=input_mask,
                       config=ernie_config,
                       use_fp16=args.use_fp16)

    enc_out = ernie.get_sequence_output()

    emission = fluid.layers.fc(
        input=enc_out,
        size=args.num_labels,
        param_attr=fluid.ParamAttr(
            initializer=fluid.initializer.Uniform(low=-0.1, high=0.1),
            regularizer=fluid.regularizer.L2DecayRegularizer(
                regularization_coeff=1e-4)),
        num_flatten_dims=2)

    crf_cost = fluid.layers.linear_chain_crf(
        input=emission,
        label=labels,
        param_attr=fluid.ParamAttr(name='crfw',
                                   learning_rate=args.crf_learning_rate),
        length=seq_lens)

    loss = fluid.layers.mean(x=crf_cost)

    crf_decode = fluid.layers.crf_decoding(
        input=emission,
        param_attr=fluid.ParamAttr(name='crfw'),
        length=seq_lens)

    lod_labels = fluid.layers.squeeze(labels, axes=[-1])

    num_chunk_types = (
        (args.num_labels - 1) // (len(args.chunk_scheme) - 1))  # IOB配置

    (_, _, _, num_infer, num_label,
     num_correct) = fluid.layers.chunk_eval(input=crf_decode,
                                            label=lod_labels,
                                            chunk_scheme=args.chunk_scheme,
                                            num_chunk_types=num_chunk_types,
                                            seq_length=seq_lens)
    """
    enc_out = fluid.layers.dropout(x=enc_out,
                                   dropout_prob=0.1,
                                   dropout_implementation="upscale_in_train")

    logits = fluid.layers.fc(
        input=enc_out,
        size=args.num_labels,
        num_flatten_dims=2,
        param_attr=fluid.ParamAttr(
            name="cls_seq_label_out_w",
            initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
        bias_attr=fluid.ParamAttr(name="cls_seq_label_out_b",
                                  initializer=fluid.initializer.Constant(0.)))

    infers = fluid.layers.argmax(logits, axis=2)
    ret_infers = fluid.layers.reshape(x=infers, shape=[-1, 1])
    lod_labels = fluid.layers.sequence_unpad(labels, seq_lens)
    lod_infers = fluid.layers.sequence_unpad(infers, seq_lens)

    num_chunk_types = (
        (args.num_labels - 1) // (len(args.chunk_scheme) - 1))  # IOB配置

    (_, _, _, num_infer, num_label,
     num_correct) = fluid.layers.chunk_eval(input=lod_infers,
                                            label=lod_labels,
                                            chunk_scheme=args.chunk_scheme,
                                            num_chunk_types=num_chunk_types)

    labels = fluid.layers.flatten(labels, axis=2)
    ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
        logits=fluid.layers.flatten(logits, axis=2),
        label=labels,
        return_softmax=True)
    input_mask = fluid.layers.flatten(input_mask, axis=2)
    ce_loss = ce_loss * input_mask
    loss = fluid.layers.mean(x=ce_loss)
    """

    graph_vars = {
        "inputs": src_ids,
        "loss": loss,
        "seqlen": seq_lens,
        "crf_decode": crf_decode,
        "num_infer": num_infer,
        "num_label": num_label,
        "num_correct": num_correct,
    }

    for k, v in graph_vars.items():
        v.persistable = True

    return pyreader, graph_vars
Exemplo n.º 11
0
def create_model(args, pyreader_name, ernie_config, is_prediction=False):
    pyreader = fluid.layers.py_reader(
        capacity=50,
        shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
                [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1],
                [-1, 1]],
        dtypes=['int64', 'int64', 'int64', 'float32', 'int64', 'int64'],
        lod_levels=[0, 0, 0, 0, 0, 0],
        name=pyreader_name,
        use_double_buffer=True)

    (src_ids, sent_ids, pos_ids, input_mask, labels,
     qids) = fluid.layers.read_file(pyreader)

    ernie = ErnieModel(
        src_ids=src_ids,
        position_ids=pos_ids,
        sentence_ids=sent_ids,
        input_mask=input_mask,
        config=ernie_config,
        use_fp16=args.use_fp16)

    cls_feats = ernie.get_pooled_output()
    cls_feats = fluid.layers.dropout(
        x=cls_feats,
        dropout_prob=0.1,
        dropout_implementation="upscale_in_train")
    logits = fluid.layers.fc(
        input=cls_feats,
        size=args.num_labels,
        param_attr=fluid.ParamAttr(
            name="cls_out_w",
            initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
        bias_attr=fluid.ParamAttr(
            name="cls_out_b", initializer=fluid.initializer.Constant(0.)))

    if is_prediction:
        probs = fluid.layers.softmax(logits)
        feed_targets_name = [
            src_ids.name, sent_ids.name, pos_ids.name, input_mask.name
        ]
        return pyreader, probs, feed_targets_name

    ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
        logits=logits, label=labels, return_softmax=True)
    loss = fluid.layers.mean(x=ce_loss)

    if args.use_fp16 and args.loss_scaling > 1.0:
        loss *= args.loss_scaling

    num_seqs = fluid.layers.create_tensor(dtype='int64')
    accuracy = fluid.layers.accuracy(input=probs, label=labels, total=num_seqs)
    auc, batch_auc, [batch_stat_pos, batch_stat_neg, stat_pos, stat_neg] = fluid.layers.auc(input=probs, label=labels)

    graph_vars = {
        "loss": loss,
        "probs": probs,
        "accuracy": accuracy,
        "labels": labels,
        "num_seqs": num_seqs,
        "qids": qids,
        "auc": auc,
        "batch_auc": batch_auc,
        "batch_stat_pos": batch_stat_pos,
        "batch_stat_neg": batch_stat_neg,
        "stat_pos": stat_pos,
        "stat_neg": stat_neg
    }

    for k, v in graph_vars.items():
        v.persistable = True

    return pyreader, graph_vars
Exemplo n.º 12
0
def create_model(args,
                 pyreader_name,
                 ernie_config,
                 is_prediction=False,
                 task_name="",
                 is_classify=False,
                 is_regression=False,
                 ernie_version="1.0"):
    """
    this function is mainly for creating model and inputs placeholder
    """
    if is_classify:
        pyreader = fluid.layers.py_reader(capacity=50,
                                          shapes=[[-1, args.max_seq_len, 1],
                                                  [-1, args.max_seq_len, 1],
                                                  [-1, args.max_seq_len, 1],
                                                  [-1, args.max_seq_len, 1],
                                                  [-1, args.max_seq_len, 1],
                                                  [-1, 1], [-1, 1]],
                                          dtypes=[
                                              'int64', 'int64', 'int64',
                                              'int64', 'float32', 'int64',
                                              'int64'
                                          ],
                                          lod_levels=[0, 0, 0, 0, 0, 0, 0],
                                          name=task_name + "_" + pyreader_name,
                                          use_double_buffer=True)
    elif is_regression:
        pyreader = fluid.layers.py_reader(capacity=50,
                                          shapes=[[-1, args.max_seq_len, 1],
                                                  [-1, args.max_seq_len, 1],
                                                  [-1, args.max_seq_len, 1],
                                                  [-1, args.max_seq_len, 1],
                                                  [-1, args.max_seq_len, 1],
                                                  [-1, 1], [-1, 1]],
                                          dtypes=[
                                              'int64', 'int64', 'int64',
                                              'int64', 'float32', 'float32',
                                              'int64'
                                          ],
                                          lod_levels=[0, 0, 0, 0, 0, 0, 0],
                                          name=task_name + "_" + pyreader_name,
                                          use_double_buffer=True)

    (src_ids, sent_ids, pos_ids, task_ids, input_mask, labels,
     qids) = fluid.layers.read_file(pyreader)

    ernie = ErnieModel(src_ids=src_ids,
                       position_ids=pos_ids,
                       sentence_ids=sent_ids,
                       task_ids=task_ids,
                       input_mask=input_mask,
                       config=ernie_config,
                       use_fp16=args.use_fp16)

    cls_feats = ernie.get_pooled_output()
    cls_feats = fluid.layers.dropout(x=cls_feats,
                                     dropout_prob=0.1,
                                     dropout_implementation="upscale_in_train")
    logits = fluid.layers.fc(
        input=cls_feats,
        size=args.num_labels,
        param_attr=fluid.ParamAttr(
            name=task_name + "_cls_out_w",
            initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
        bias_attr=fluid.ParamAttr(name=task_name + "_cls_out_b",
                                  initializer=fluid.initializer.Constant(0.)))

    if is_prediction:
        probs = fluid.layers.softmax(logits)
        feed_targets_name = [
            src_ids.name, sent_ids.name, pos_ids.name, input_mask.name
        ]
        if ernie_version == "2.0":
            feed_targets_name += [task_ids.name]
        return pyreader, probs, feed_targets_name

    assert is_classify != is_regression, 'is_classify or is_regression must be true and only one of them can be true'
    num_seqs = fluid.layers.create_tensor(dtype='int64')
    if is_classify:
        ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
            logits=logits, label=labels, return_softmax=True)
        loss = fluid.layers.mean(x=ce_loss)
        accuracy = fluid.layers.accuracy(input=probs,
                                         label=labels,
                                         total=num_seqs)
        graph_vars = {
            "loss": loss,
            "probs": probs,
            "accuracy": accuracy,
            "labels": labels,
            "num_seqs": num_seqs,
            "qids": qids
        }
    elif is_regression:
        cost = fluid.layers.square_error_cost(input=logits, label=labels)
        loss = fluid.layers.mean(x=cost)
        graph_vars = {
            "loss": loss,
            "probs": logits,
            "labels": labels,
            "num_seqs": num_seqs,
            "qids": qids
        }
    else:
        raise ValueError(
            'unsupported fine tune mode. only supported classify/regression')

    return pyreader, graph_vars
Exemplo n.º 13
0
def create_model(args, pyreader_name, ernie_config, is_training):
    pyreader = fluid.layers.py_reader(capacity=50,
                                      shapes=[[-1, args.max_seq_len, 1],
                                              [-1, args.max_seq_len, 1],
                                              [-1, args.max_seq_len, 1],
                                              [-1, args.max_seq_len, 1],
                                              [-1, args.max_seq_len, 1],
                                              [-1, 1], [-1, 1], [-1, 1]],
                                      dtypes=[
                                          'int64', 'int64', 'int64', 'int64',
                                          'float32', 'int64', 'int64', 'int64'
                                      ],
                                      lod_levels=[0, 0, 0, 0, 0, 0, 0, 0],
                                      name=pyreader_name,
                                      use_double_buffer=True)
    (src_ids, sent_ids, pos_ids, task_ids, input_mask, start_positions,
     end_positions, unique_id) = fluid.layers.read_file(pyreader)

    ernie = ErnieModel(src_ids=src_ids,
                       position_ids=pos_ids,
                       sentence_ids=sent_ids,
                       task_ids=task_ids,
                       input_mask=input_mask,
                       config=ernie_config,
                       use_fp16=args.use_fp16)

    enc_out = ernie.get_sequence_output()
    enc_out = fluid.layers.dropout(x=enc_out,
                                   dropout_prob=0.1,
                                   dropout_implementation="upscale_in_train")

    logits = fluid.layers.fc(
        input=enc_out,
        size=2,
        num_flatten_dims=2,
        param_attr=fluid.ParamAttr(
            name="cls_mrc_out_w",
            initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
        bias_attr=fluid.ParamAttr(name="cls_mrc_out_b",
                                  initializer=fluid.initializer.Constant(0.)))

    logits = fluid.layers.transpose(x=logits, perm=[2, 0, 1])
    start_logits, end_logits = fluid.layers.unstack(x=logits, axis=0)

    batch_ones = fluid.layers.fill_constant_batch_size_like(input=start_logits,
                                                            dtype='int64',
                                                            shape=[1],
                                                            value=1)
    num_seqs = fluid.layers.reduce_sum(input=batch_ones)

    def compute_loss(logits, positions):
        loss = fluid.layers.softmax_with_cross_entropy(logits=logits,
                                                       label=positions)
        loss = fluid.layers.mean(x=loss)
        return loss

    start_loss = compute_loss(start_logits, start_positions)
    end_loss = compute_loss(end_logits, end_positions)
    loss = (start_loss + end_loss) / 2.0
    if args.use_fp16 and args.loss_scaling > 1.0:
        loss *= args.loss_scaling

    graph_vars = {
        "loss": loss,
        "num_seqs": num_seqs,
        "unique_id": unique_id,
        "start_logits": start_logits,
        "end_logits": end_logits
    }

    for k, v in graph_vars.items():
        v.persistable = True

    return pyreader, graph_vars
Exemplo n.º 14
0
def create_model(args, pyreader_name, ernie_config, is_prediction=False):
    pyreader = fluid.layers.py_reader(capacity=50,
                                      shapes=[[-1, args.max_seq_len, 1],
                                              [-1, args.max_seq_len, 1],
                                              [-1, args.max_seq_len, 1],
                                              [-1, args.max_seq_len, 1],
                                              [-1, args.max_seq_len, 1],
                                              [-1, args.max_seq_len, 1],
                                              [-1, 1]],
                                      dtypes=[
                                          'int64', 'int64', 'int64', 'int64',
                                          'float32', 'int64', 'int64'
                                      ],
                                      lod_levels=[0, 0, 0, 0, 0, 0, 0],
                                      name=pyreader_name,
                                      use_double_buffer=True)

    (src_ids, sent_ids, pos_ids, task_ids, input_mask, labels,
     seq_lens) = fluid.layers.read_file(pyreader)

    ernie = ErnieModel(src_ids=src_ids,
                       position_ids=pos_ids,
                       sentence_ids=sent_ids,
                       task_ids=task_ids,
                       input_mask=input_mask,
                       config=ernie_config,
                       use_fp16=args.use_fp16)

    enc_out = ernie.get_sequence_output()
    enc_out = fluid.layers.dropout(x=enc_out,
                                   dropout_prob=0.1,
                                   dropout_implementation="upscale_in_train")
    logits = fluid.layers.fc(
        input=enc_out,
        size=args.num_labels,
        num_flatten_dims=2,
        param_attr=fluid.ParamAttr(
            name="cls_seq_label_out_w",
            initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
        bias_attr=fluid.ParamAttr(name="cls_seq_label_out_b",
                                  initializer=fluid.initializer.Constant(0.)))
    infers = fluid.layers.argmax(logits, axis=2)

    ret_labels = fluid.layers.reshape(x=labels, shape=[-1, 1])
    ret_infers = fluid.layers.reshape(x=infers, shape=[-1, 1])

    lod_labels = fluid.layers.sequence_unpad(labels, seq_lens)
    lod_infers = fluid.layers.sequence_unpad(infers, seq_lens)

    (_, _, _, num_infer, num_label, num_correct) = fluid.layers.chunk_eval(
        input=lod_infers,
        label=lod_labels,
        chunk_scheme=args.chunk_scheme,
        num_chunk_types=((args.num_labels - 1) //
                         (len(args.chunk_scheme) - 1)))

    labels = fluid.layers.flatten(labels, axis=2)
    ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
        logits=fluid.layers.flatten(logits, axis=2),
        label=labels,
        return_softmax=True)
    input_mask = fluid.layers.flatten(input_mask, axis=2)
    ce_loss = ce_loss * input_mask
    loss = fluid.layers.mean(x=ce_loss)

    if args.use_fp16 and args.loss_scaling > 1.0:
        loss *= args.loss_scaling

    graph_vars = {
        "loss": loss,
        "probs": probs,
        "labels": ret_labels,
        "infers": ret_infers,
        "num_infer": num_infer,
        "num_label": num_label,
        "num_correct": num_correct,
        "seq_lens": seq_lens
    }

    for k, v in graph_vars.items():
        v.persistable = True

    return pyreader, graph_vars
Exemplo n.º 15
0
def create_model(args,
                 pyreader_name,
                 ernie_config,
                 is_prediction=False,
                 task_name="",
                 is_classify=False,
                 is_regression=False,
                 ernie_version="1.0"):
    if is_classify:
        # 增加邻接矩阵和核心词的shape
        pyreader = fluid.layers.py_reader(capacity=50,
                                          shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
                                                  [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
                                                  [-1, args.max_seq_len, 1], [-1, 1], [-1, 1],
                                                  [-1, args.max_seq_len, args.max_seq_len], [-1, 2]],
                                          dtypes=[
                                              'int64', 'int64', 'int64', 'int64', 'float32', 'int64', 'int64', 'int64',
                                              'int64'
                                          ],
                                          lod_levels=[0, 0, 0, 0, 0, 0, 0, 0, 0],
                                          name=task_name + "_" + pyreader_name,
                                          use_double_buffer=True)
    elif is_regression:
        pyreader = fluid.layers.py_reader(capacity=50,
                                          shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
                                                  [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
                                                  [-1, args.max_seq_len, 1], [-1, 1], [-1, 1]],
                                          dtypes=['int64', 'int64', 'int64', 'int64', 'float32', 'float32', 'int64'],
                                          lod_levels=[0, 0, 0, 0, 0, 0, 0],
                                          name=task_name + "_" + pyreader_name,
                                          use_double_buffer=True)

    (src_ids, sent_ids, pos_ids, task_ids, input_mask, labels, qids, adj_mat,
     head_ids) = fluid.layers.read_file(pyreader)

    ernie = ErnieModel(src_ids=src_ids,
                       position_ids=pos_ids,
                       sentence_ids=sent_ids,
                       task_ids=task_ids,
                       input_mask=input_mask,
                       config=ernie_config,
                       use_fp16=args.use_fp16)

    erinie_output = ernie.get_sequence_output()
    cls_feats = ernie.get_pooled_output()

    # 增加GAT网络
    gat = gnn.GAT(input_size=768, hidden_size=100, output_size=50, dropout=0.0, alpha=0.1, heads=12, layer=2)
    # 将ernie的表示和邻接矩阵输入到gat网络中得到包含句子结构信息的表示
    gat_emb = gat.forward(erinie_output, adj_mat)
    # 提取核心词的表示
    gat_emb = utils.index_sample(gat_emb, head_ids)
    # 将[CLS]和核心词的表示拼接,供下游网络使用
    cls_feats = fluid.layers.concat([cls_feats, gat_emb], axis=1)

    cls_feats = fluid.layers.dropout(x=cls_feats, dropout_prob=0.1, dropout_implementation="upscale_in_train")
    logits = fluid.layers.fc(input=cls_feats,
                             size=args.num_labels,
                             param_attr=fluid.ParamAttr(name=task_name + "_cls_out_w",
                                                        initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
                             bias_attr=fluid.ParamAttr(name=task_name + "_cls_out_b",
                                                       initializer=fluid.initializer.Constant(0.)))

    if is_prediction:
        probs = fluid.layers.softmax(logits)
        feed_targets_name = [src_ids.name, sent_ids.name, pos_ids.name, input_mask.name]
        if ernie_version == "2.0":
            feed_targets_name += [task_ids.name]
        return pyreader, probs, feed_targets_name

    assert is_classify != is_regression, 'is_classify or is_regression must be true and only one of them can be true'
    num_seqs = fluid.layers.create_tensor(dtype='int64')
    if is_classify:
        ce_loss, probs = fluid.layers.softmax_with_cross_entropy(logits=logits, label=labels, return_softmax=True)
        loss = fluid.layers.mean(x=ce_loss)
        accuracy = fluid.layers.accuracy(input=probs, label=labels, total=num_seqs)
        graph_vars = {
            "loss": loss,
            "probs": probs,
            "accuracy": accuracy,
            "labels": labels,
            "num_seqs": num_seqs,
            "qids": qids
        }
    elif is_regression:
        cost = fluid.layers.square_error_cost(input=logits, label=labels)
        loss = fluid.layers.mean(x=cost)
        graph_vars = {"loss": loss, "probs": logits, "labels": labels, "num_seqs": num_seqs, "qids": qids}
    else:
        raise ValueError('unsupported fine tune mode. only supported classify/regression')

    return pyreader, graph_vars
Exemplo n.º 16
0
def create_model(args, phase, micro_bsz, dp_sharding_rank, dp_worldsize, topo):
    if args.use_sop:
        from reader.pretraining_ds_ernie_full_sent import make_pretrain_dataset
    else:
        from reader.pretraining_ds_mlm import make_pretrain_dataset

    # mask_label, mask_pos for mlm, labels for sop
    if args.use_sop:
        input_fields = {
            'names':
            ['src_ids', 'sent_ids', 'mask_label', 'mask_pos', 'labels'],
            'shapes': [[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
                       [-1, 1], [-1, 1], [-1, 1]],
            'dtypes': ['int64', 'int64', 'int64', 'int64', 'int64'],
            'lod_levels': [0, 0, 0, 0, 0],
        }
    else:
        input_fields = {
            'names': ['src_ids', 'sent_ids', 'mask_label', 'mask_pos'],
            'shapes': [[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
                       [-1, 1], [-1, 1]],
            'dtypes': ['int64', 'int64', 'int64', 'int64'],
            'lod_levels': [0, 0, 0, 0],
        }

    with fluid.device_guard("gpu:0"):
        inputs = [
            fluid.data(name=input_fields['names'][i],
                       shape=input_fields['shapes'][i],
                       dtype=input_fields['dtypes'][i],
                       lod_level=input_fields['lod_levels'][i])
            for i in range(len(input_fields['names']))
        ]
    if args.use_sop:
        (src_ids, sent_ids, mask_label, mask_pos, labels) = inputs
    else:
        (src_ids, sent_ids, mask_label, mask_pos) = inputs
    train_file_list = glob.glob(args.data_dir + "/*")
    vocab = {}
    with open(args.vocab_file) as r:
        for line in r:
            lines = line.strip().split('\t')
            vocab[lines[0]] = int(lines[1])

    log.debug("========= worker: {} of {} ==========".format(
        dp_sharding_rank, dp_worldsize))

    data_reader = make_pretrain_dataset('pt', train_file_list, True, vocab,
                                        micro_bsz, len(vocab),
                                        args.max_seq_len, dp_sharding_rank,
                                        dp_worldsize)
    with fluid.device_guard("gpu:0"):
        data_loader = fluid.io.DataLoader.from_generator(feed_list=inputs,
                                                         capacity=70,
                                                         iterable=False)
    places = fluid.CUDAPlace(int(os.environ.get('FLAGS_selected_gpus', 0)))

    def data_gen():
        yield from data_reader

    data_loader.set_batch_generator(data_gen, places)

    ernie_config = ErnieConfig(args.ernie_config_file)._config_dict
    ernie_config["preln"] = args.preln

    weight_sharing = (topo.mp.size == 1 and topo.pp.size == 1
                      )  # pp mp should not do weight sharing
    with fluid.device_guard("gpu:0"):
        ernie = ErnieModel(src_ids,
                           sent_ids,
                           ernie_config,
                           weight_sharing=weight_sharing,
                           topo=topo)
    checkpoints = ernie._checkpoints
    checkpoints.pop(-1)

    with fluid.device_guard(f'gpu:{args.num_pp-1}'):
        mask_lm_loss, mean_mask_lm_loss = ernie.get_lm_output(
            mask_label, mask_pos)
        total_loss = mean_mask_lm_loss

        if args.use_sop:
            sop_acc, mean_sop_loss = ernie.get_next_sentence_output(labels)
            total_loss += mean_sop_loss

        if topo.pp.size > 1:
            mask_lm_loss.persistable = True
            mean_mask_lm_loss.persistable = True
            # checkpoints.extend([mask_lm_loss.name, mean_mask_lm_loss.name])
            if args.use_sop:
                mean_sop_loss.persistable = True
                sop_acc.persistable = True
                # checkpoints.extend([mean_sop_loss.name, sop_acc.name])
            total_loss.persistable = True
            # checkpoints.append(total_loss.name)

    if args.use_sop:
        graph_vars = {
            'data_loader': data_loader,
            'mask_lm_loss': mask_lm_loss,
            'mean_mask_lm_loss': mean_mask_lm_loss,
            'sop_loss': mean_sop_loss,
            'sop_acc': sop_acc,
            'total_loss': total_loss,
            'checkpoints': checkpoints
        }
    else:
        graph_vars = {
            'data_loader': data_loader,
            'mask_lm_loss': mask_lm_loss,
            'mean_mask_lm_loss': mean_mask_lm_loss,
            'total_loss': total_loss,
            'checkpoints': checkpoints,
        }
    return graph_vars
Exemplo n.º 17
0
def create_model(args, pyreader_name, ernie_config):
    pyreader = fluid.layers.py_reader(
        capacity=50,  #缓冲区的容量数据个数
        shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
                [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
                [-1, args.max_seq_len, 1],
                [-1, args.max_seq_len, args.num_labels], [-1, 1], [-1, 1],
                [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1]],
        dtypes=[
            'int64', 'int64', 'int64', 'int64', 'float32', 'float32', 'int64',
            'int64', 'int64', 'int64'
        ],
        lod_levels=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        name=pyreader_name,
        use_double_buffer=True)

    (src_ids, sent_ids, pos_ids, task_ids, input_mask, labels, seq_lens,
     example_index, tok_to_orig_start_index,
     tok_to_orig_end_index) = fluid.layers.read_file(pyreader)

    ernie = ErnieModel(src_ids=src_ids,
                       position_ids=pos_ids,
                       sentence_ids=sent_ids,
                       task_ids=task_ids,
                       input_mask=input_mask,
                       config=ernie_config,
                       use_fp16=args.use_fp16)
    #embedding+encoder

    enc_out = ernie.get_sequence_output()
    #get encoder layer
    enc_out = fluid.layers.dropout(x=enc_out,
                                   dropout_prob=0.1,
                                   dropout_implementation="upscale_in_train")
    logits = fluid.layers.fc(
        input=enc_out,
        size=args.num_labels,
        num_flatten_dims=2,
        param_attr=fluid.ParamAttr(
            name="cls_seq_label_out_w",
            initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
        bias_attr=fluid.ParamAttr(name="cls_seq_label_out_b",
                                  initializer=fluid.initializer.Constant(0.)))
    logits = fluid.layers.sigmoid(logits)

    lod_labels = fluid.layers.sequence_unpad(labels, seq_lens)
    lod_logits = fluid.layers.sequence_unpad(logits, seq_lens)
    lod_tok_to_orig_start_index = fluid.layers.sequence_unpad(
        tok_to_orig_start_index, seq_lens)
    lod_tok_to_orig_end_index = fluid.layers.sequence_unpad(
        tok_to_orig_end_index, seq_lens)

    labels = fluid.layers.flatten(labels, axis=2)
    logits = fluid.layers.flatten(logits, axis=2)
    input_mask = fluid.layers.flatten(input_mask, axis=2)

    # calculate loss
    log_logits = fluid.layers.log(logits)
    log_logits_neg = fluid.layers.log(1 - logits)
    ce_loss = 0. - labels * log_logits - (1 - labels) * log_logits_neg

    ce_loss = fluid.layers.reduce_mean(ce_loss, dim=1, keep_dim=True)
    ce_loss = ce_loss * input_mask
    loss = fluid.layers.mean(x=ce_loss)

    graph_vars = {
        "inputs": src_ids,
        "loss": loss,
        "seqlen": seq_lens,
        "lod_logit": lod_logits,
        "lod_label": lod_labels,
        "example_index": example_index,
        "tok_to_orig_start_index": lod_tok_to_orig_start_index,
        "tok_to_orig_end_index": lod_tok_to_orig_end_index
    }

    for k, v in graph_vars.items():
        v.persistable = True

    return pyreader, graph_vars
Exemplo n.º 18
0
def create_model(pyreader_name, ernie_config, task_group):
    """create_model"""
    ## get input
    shapes = [[bsz, args.max_seq_len, 1], [bsz, args.max_seq_len, 1],
              [bsz, args.max_seq_len, 1], [bsz, args.max_seq_len, 1],
              [bsz, args.max_seq_len, 1], [bsz, 1], [bsz, 1], [1], [bsz, 1],
              [bsz, 1], [bsz, 1]]
    names = [
        "src_ids", "pos_ids", "sent_ids", "task_ids", "input_mask",
        "mask_label", "mask_pos", "lm_weight", "batch_mask", "loss_mask",
        "gather_idx"
    ]
    dtypes = [
        "int64", "int64", "int64", "int64", "float32", "int64", "int64",
        "float32", "float32", "float32", "int64"
    ]
    cnt_general_input = len(shapes)

    for index, task in enumerate(task_group):
        shapes.extend([[bsz, 1], [1]])
        names.extend(['task_label_' + str(index), 'task_weight_' + str(index)])
        dtypes.extend(["int64", "float32"])

    assert len(shapes) == len(names) == len(
        dtypes), "The three fields must have same size"
    inputs = []
    for i in range(len(shapes)):
        inputs.append(
            fluid.layers.data(name=names[i],
                              shape=shapes[i],
                              dtype=dtypes[i],
                              append_batch_size=False))

    general_data, task_params = inputs[:cnt_general_input], inputs[
        cnt_general_input:]
    src_ids, pos_ids, sent_ids, task_ids, input_mask, \
                  mask_label, mask_pos, lm_weight, batch_mask, loss_mask, gather_idx = general_data

    ## build graph
    ernie = ErnieModel(src_ids=src_ids,
                       position_ids=pos_ids,
                       sentence_ids=sent_ids,
                       task_ids=task_ids,
                       input_mask=input_mask,
                       config=ernie_config,
                       weight_sharing=args.weight_sharing,
                       use_fp16=args.use_amp)

    mask_lm_loss = ernie.get_lm_output(mask_label, mask_pos)
    checkpoints = ernie.get_checkpoints()
    total_loss = mask_lm_loss * lm_weight
    graph_vars = [mask_lm_loss, lm_weight]

    index = 0
    total_constract_loss = 0
    for task in task_group:
        task_labels = task_params[index]
        task_weight = task_params[index + 1]
        task_loss, task_acc = ernie.get_task_output(task, task_labels,
                                                    gather_idx)
        total_loss += task_loss * task_weight * task["loss_weight"]
        if task["constart"]:
            contract_loss = ernie.get_contrastive_loss(batch_mask, loss_mask)
            total_loss += contract_loss * task_weight
            total_constract_loss += contract_loss * task_weight
        graph_vars.extend([task_acc, task_weight])
        index += 2

    ## build output
    graph_vars.append(total_constract_loss)
    graph_vars.append(total_loss)
    #for var in graph_vars:
    #    var.persistable = True

    fetch_vars = {"graph_vars": graph_vars, "checkpoints": checkpoints}

    return fetch_vars, names
def create_model(ernie_config, is_training=False):
    if is_training:
        input_fields = {
            'names': [
                'src_ids', 'pos_ids', 'sent_ids', 'input_mask',
                'start_positions', 'end_positions'
            ],
            'shapes': [[None, None], [None, None], [None, None],
                       [None, None, 1], [None, 1], [None, 1]],
            'dtypes': ['int64', 'int64', 'int64', 'float32', 'int64', 'int64'],
            'lod_levels': [0, 0, 0, 0, 0, 0],
        }
    else:
        input_fields = {
            'names':
            ['src_ids', 'pos_ids', 'sent_ids', 'input_mask', 'unique_id'],
            'shapes': [[None, None], [None, None], [None, None],
                       [None, None, 1], [None, 1]],
            'dtypes': ['int64', 'int64', 'int64', 'float32', 'int64'],
            'lod_levels': [0, 0, 0, 0, 0],
        }

    inputs = [
        fluid.data(name=input_fields['names'][i],
                   shape=input_fields['shapes'][i],
                   dtype=input_fields['dtypes'][i],
                   lod_level=input_fields['lod_levels'][i])
        for i in range(len(input_fields['names']))
    ]

    data_loader = fluid.io.DataLoader.from_generator(feed_list=inputs,
                                                     capacity=50,
                                                     iterable=False)

    if is_training:
        (src_ids, pos_ids, sent_ids, input_mask, start_positions,
         end_positions) = inputs
    else:
        (src_ids, pos_ids, sent_ids, input_mask, unique_id) = inputs

    ernie = ErnieModel(src_ids=src_ids,
                       position_ids=pos_ids,
                       sentence_ids=sent_ids,
                       input_mask=input_mask,
                       config=ernie_config,
                       use_fp16=args.use_fp16)

    enc_out = ernie.get_sequence_output()

    logits = fluid.layers.fc(
        input=enc_out,
        size=2,
        num_flatten_dims=2,
        param_attr=fluid.ParamAttr(
            name="cls_squad_out_w",
            initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
        bias_attr=fluid.ParamAttr(name="cls_squad_out_b",
                                  initializer=fluid.initializer.Constant(0.)))

    logits = fluid.layers.transpose(x=logits, perm=[2, 0, 1])
    start_logits, end_logits = fluid.layers.unstack(x=logits, axis=0)

    batch_ones = fluid.layers.fill_constant_batch_size_like(input=start_logits,
                                                            dtype='int64',
                                                            shape=[1],
                                                            value=1)
    num_seqs = fluid.layers.reduce_sum(input=batch_ones)

    if is_training:

        def compute_loss(logits, positions):
            loss = fluid.layers.softmax_with_cross_entropy(logits=logits,
                                                           label=positions)
            loss = fluid.layers.mean(x=loss)
            return loss

        start_loss = compute_loss(start_logits, start_positions)
        end_loss = compute_loss(end_logits, end_positions)
        total_loss = (start_loss + end_loss) / 2.0
        return data_loader, total_loss, num_seqs
    else:
        return data_loader, unique_id, start_logits, end_logits, num_seqs
Exemplo n.º 20
0
def create_model(pyreader_name, ernie_config, task_group):
    """create_model"""
    src_ids = fluid.layers.data(name='src_ids',
            shape=[-1, args.max_seq_len, 1], dtype='int64')
    pos_ids = fluid.layers.data(name='pos_ids',
            shape=[-1, args.max_seq_len, 1], dtype='int64')
    sent_ids= fluid.layers.data(name='sent_ids',
            shape=[-1, args.max_seq_len, 1], dtype='int64')
    task_ids= fluid.layers.data(name='task_ids',
            shape=[-1, args.max_seq_len, 1], dtype='int64')
    input_mask = fluid.layers.data(name='input_mask',
            shape=[-1, args.max_seq_len, args.max_seq_len], dtype='float32')
    mask_label = fluid.layers.data(name='mask_label',
            shape=[-1, 1], dtype='int64')
    mask_pos = fluid.layers.data(name='mask_pos',
            shape=[-1, 1], dtype='int64')
    lm_weight = fluid.layers.data(name='lm_weight',
            shape=[1], dtype='float32', append_batch_size=False)
    batch_mask = fluid.layers.data(name='batch_mask',
            shape=[-1, 1], dtype='float32')
    loss_mask = fluid.layers.data(name="loss_mask", 
             shape=[-1, 1], dtype='float32')
    gather_idx = fluid.layers.data(name="gather_idx", 
             shape=[-1, 1], dtype='int64')

    
    task_params_all = []
    for index, task in enumerate(task_group):
        name_label = 'task_label_' + str(index)
        name_weight = 'task_weight_' + str(index)
        task_label = fluid.layers.data(name=name_label,
            shape=[-1, 1], dtype='int64')
        task_weight = fluid.layers.data(name=name_weight,
            shape=[1], dtype='float32', append_batch_size=False)
        task_params_all.extend([task_label, task_weight])

    fluid.reader.keep_data_loader_order(False)
    feed_list = [src_ids, pos_ids, sent_ids, task_ids, input_mask, \
                  mask_label, mask_pos, lm_weight, batch_mask, loss_mask, gather_idx] + task_params_all
    pyreader = fluid.io.DataLoader.from_generator(
            feed_list=feed_list,
            capacity=70, iterable=False)

    ernie = ErnieModel(
        src_ids=src_ids,
        position_ids=pos_ids,
        sentence_ids=sent_ids,
        task_ids=task_ids,
        input_mask=input_mask,
        config=ernie_config,
        weight_sharing=args.weight_sharing,
        use_fp16=args.use_amp)

    mask_lm_loss = ernie.get_lm_output(mask_label, mask_pos)
    checkpoints = ernie.get_checkpoints()
    
    total_loss = mask_lm_loss * lm_weight
    graph_vars = [mask_lm_loss, lm_weight]
    index = 11
    total_constract_loss = 0
    for task in task_group:
        task_labels = feed_list[index]
        task_weight = feed_list[index + 1]
        task_loss, task_acc = ernie.get_task_output(task, task_labels, gather_idx)
        total_loss += task_loss * task_weight * task["loss_weight"]
        if task["constart"]:
            contract_loss = ernie.get_contrastive_loss(batch_mask, loss_mask)
            total_loss += contract_loss * task_weight
            total_constract_loss += contract_loss * task_weight
        graph_vars.extend([task_acc, task_weight])
        index += 2
    
    graph_vars.append(total_constract_loss)
    graph_vars.append(total_loss)
    for var in graph_vars:
        var.persistable = True

    fetch_vars = {"graph_vars": graph_vars,
                  "checkpoints": checkpoints}

    return pyreader, fetch_vars
Exemplo n.º 21
0
def create_model(args,
                 pyreader_name,
                 ernie_config,
                 batch_size=16,
                 is_prediction=False,
                 task_name=""):
    pyreader = fluid.layers.py_reader(
        capacity=50,
        shapes=[[batch_size, args.q_max_seq_len, 1], [batch_size, args.q_max_seq_len, 1],
            [batch_size, args.q_max_seq_len, 1], [batch_size, args.q_max_seq_len, 1],
            [batch_size, args.q_max_seq_len, 1],
            [batch_size, args.p_max_seq_len, 1], [batch_size, args.p_max_seq_len, 1],
            [batch_size, args.p_max_seq_len, 1], [batch_size, args.p_max_seq_len, 1],
            [batch_size, args.p_max_seq_len, 1],
            [batch_size, 1], [batch_size, 1]],
    dtypes=['int64', 'int64', 'int64', 'int64', 'float32',
            'int64', 'int64', 'int64', 'int64', 'float32',
            'int64', 'int64'],
    lod_levels=[0, 0, 0, 0, 0,  0, 0, 0, 0, 0,  0, 0],
    name=pyreader_name,
    use_double_buffer=True)

    (src_ids_q, sent_ids_q, pos_ids_q, task_ids_q, input_mask_q,
     src_ids_p, sent_ids_p, pos_ids_p, task_ids_p, input_mask_p,
     labels, qids) = fluid.layers.read_file(pyreader)

    ernie_q = ErnieModel(
        src_ids=src_ids_q,
        position_ids=pos_ids_q,
        sentence_ids=sent_ids_q,
        task_ids=task_ids_q,
        input_mask=input_mask_q,
        config=ernie_config,
        model_name='query_')
    ## pos para
    ernie_p = ErnieModel(
        src_ids=src_ids_p,
        position_ids=pos_ids_p,
        sentence_ids=sent_ids_p,
        task_ids=task_ids_p,
        input_mask=input_mask_p,
        config=ernie_config,
        model_name='titlepara_')

    q_cls_feats = ernie_q.get_cls_output()
    p_cls_feats = ernie_p.get_cls_output()
    #p_cls_feats = fluid.layers.concat([pos_cls_feats, neg_cls_feats], axis=0)
    #src_ids_p = fluid.layers.Print(src_ids_p, message='p: ')
    #p_cls_feats = fluid.layers.Print(p_cls_feats, message='p: ')

    #multiply
    logits = fluid.layers.matmul(q_cls_feats, p_cls_feats, transpose_x=False, transpose_y=True)
    probs = logits
    #fluid.layers.Print(probs, message='probs: ')
    #logits2 = fluid.layers.elementwise_mul(x=q_rep, y=p_rep)
    #fluid.layers.Print(logits2, message='logits2: ')
    #probs2 = fluid.layers.reduce_sum(logits, dim=-1)
    #fluid.layers.Print(probs2, message='probs2: ')

    matrix_labels = fluid.layers.eye(batch_size, batch_size, dtype='float32')
    matrix_labels.stop_gradient=True

    #print('DEBUG:\tstart loss')
    ce_loss, _ = fluid.layers.softmax_with_cross_entropy(
           logits=logits, label=matrix_labels, soft_label=True, return_softmax=True)
    loss = fluid.layers.mean(x=ce_loss)
    #print('DEBUG:\tloss done')

    matrix_labels = fluid.layers.argmax(matrix_labels, axis=-1)
    matrix_labels = fluid.layers.reshape(x=matrix_labels, shape=[batch_size, 1])
    num_seqs = fluid.layers.create_tensor(dtype='int64')
    accuracy = fluid.layers.accuracy(input=probs, label=matrix_labels, total=num_seqs)

    #ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
    #    logits=logits, label=labels, return_softmax=True)
    #loss = fluid.layers.mean(x=ce_loss)
    #accuracy = fluid.layers.accuracy(
    #    input=probs, label=labels, total=num_seqs)
    graph_vars = {
        "loss": loss,
        "probs": probs,
        "accuracy": accuracy,
        "labels": labels,
        "num_seqs": num_seqs,
        "qids": qids,
        "q_rep": q_cls_feats,
        "p_rep": p_cls_feats
    }

    return pyreader, graph_vars
Exemplo n.º 22
0
    def create_model(self, decoding=False):
        if decoding:
            return self.infilling_decode()

        if self.task_type == "dialog":
            emb_num = 4
        else:
            emb_num = 3
        input_shapes = [[-1, self.max_seq_len, 1]] * emb_num + \
                       [[-1, self.max_seq_len, self.max_seq_len]]
        query_input_shapes = [[-1, self.max_seq_len, 1]] * emb_num + \
                             [[-1, self.max_seq_len, self.max_seq_len * 2]]
        input_dtypes = ['int64'] * emb_num + ['float32']
        input_lod_levels = [0] * emb_num + [0]
        shapes = input_shapes + query_input_shapes + [[-1, 1], [-1, 1]]
        dtypes = input_dtypes * 2 + ['int64', 'int64']
        lod_levels = input_lod_levels * 2 + [0, 0]

        inputs = self.to_ternsor(shapes, dtypes, lod_levels)
        pyreader = fluid.io.DataLoader.from_generator(feed_list=inputs,
                                                      capacity=50,
                                                      iterable=False)

        emb_ids = [{}, {}]
        for key, value in zip(self.emb_keys, inputs[:emb_num]):
            emb_ids[0][key] = value
        for key, value in zip(self.emb_keys,
                              inputs[emb_num + 1:emb_num * 2 + 1]):
            emb_ids[1][key] = value

        input_mask, input_query_mask = inputs[emb_num], inputs[2 * emb_num + 1]
        tgt_labels, tgt_pos = inputs[-2:]

        ernie = ErnieModel(emb_ids=emb_ids,
                           input_mask=[input_mask, input_query_mask],
                           config=self.ernie_config,
                           use_fp16=self.use_fp16,
                           task_type=self.task_type)

        enc_out = ernie.get_sequence_output()
        fc_out = self.cal_logit(enc_out, tgt_pos)

        if self.label_smooth:
            out_size = self.ernie_config[
                "tgt_vocab_size"] or self.ernie_config['vocab_size']
            labels = fluid.layers.label_smooth(label=fluid.layers.one_hot(
                input=tgt_labels, depth=out_size),
                                               epsilon=self.label_smooth)

            ce_loss = layers.softmax_with_cross_entropy(logits=fc_out,
                                                        label=labels,
                                                        soft_label=True)
            #probs = fluid.layers.log(fluid.layers.softmax(fc_out))
            #ce_loss = fluid.layers.kldiv_loss(probs, labels, reduction='batchmean')
        else:
            ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
                logits=fc_out, label=tgt_labels, return_softmax=True)

        loss = fluid.layers.mean(x=ce_loss)
        graph_vars = {"loss": loss}
        for k, v in graph_vars.items():
            v.persistable = True

        return pyreader, graph_vars
Exemplo n.º 23
0
def create_model(args,
                 pyreader_name,
                 ernie_config,
                 is_prediction=False,
                 task_name="",
                 is_classify=False,
                 is_regression=False,
                 ernie_version="1.0"):

    src_ids = fluid.layers.data(name='eval_placeholder_0',
                                shape=[-1, args.max_seq_len, 1],
                                dtype='int64')
    sent_ids = fluid.layers.data(name='eval_placeholder_1',
                                 shape=[-1, args.max_seq_len, 1],
                                 dtype='int64')
    pos_ids = fluid.layers.data(name='eval_placeholder_2',
                                shape=[-1, args.max_seq_len, 1],
                                dtype='int64')
    input_mask = fluid.layers.data(name='eval_placeholder_3',
                                   shape=[-1, args.max_seq_len, 1],
                                   dtype='float32')
    task_ids = fluid.layers.data(name='eval_placeholder_4',
                                 shape=[-1, args.max_seq_len, 1],
                                 dtype='int64')
    qids = fluid.layers.data(name='eval_placeholder_5',
                             shape=[-1, 1],
                             dtype='int64')

    if is_classify:
        labels = fluid.layers.data(name='6', shape=[-1, 1], dtype='int64')
    elif is_regression:
        labels = fluid.layers.data(name='6', shape=[-1, 1], dtype='float32')

    pyreader = fluid.io.DataLoader.from_generator(feed_list=[
        src_ids, sent_ids, pos_ids, task_ids, input_mask, labels, qids
    ],
                                                  capacity=70,
                                                  iterable=False)

    ernie = ErnieModel(src_ids=src_ids,
                       position_ids=pos_ids,
                       sentence_ids=sent_ids,
                       task_ids=task_ids,
                       input_mask=input_mask,
                       config=ernie_config,
                       use_fp16=args.use_fp16)

    cls_feats = ernie.get_pooled_output()
    cls_feats = fluid.layers.dropout(x=cls_feats,
                                     dropout_prob=0.1,
                                     dropout_implementation="upscale_in_train")
    logits = fluid.layers.fc(
        input=cls_feats,
        size=args.num_labels,
        param_attr=fluid.ParamAttr(
            name=task_name + "_cls_out_w",
            initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
        bias_attr=fluid.ParamAttr(name=task_name + "_cls_out_b",
                                  initializer=fluid.initializer.Constant(0.)))

    assert is_classify != is_regression, 'is_classify or is_regression must be true and only one of them can be true'
    if is_prediction:
        if is_classify:
            probs = fluid.layers.softmax(logits)
        else:
            probs = logits
        feed_targets_name = [
            src_ids.name, sent_ids.name, pos_ids.name, input_mask.name
        ]
        if ernie_version == "2.0":
            feed_targets_name += [task_ids.name]
        return pyreader, probs, feed_targets_name

    num_seqs = fluid.layers.create_tensor(dtype='int64')
    if is_classify:
        ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
            logits=logits, label=labels, return_softmax=True)
        loss = fluid.layers.mean(x=ce_loss)
        accuracy = fluid.layers.accuracy(input=probs,
                                         label=labels,
                                         total=num_seqs)
        graph_vars = {
            "loss": loss,
            "probs": probs,
            "accuracy": accuracy,
            "labels": labels,
            "num_seqs": num_seqs,
            "qids": qids,
            "logits": logits  # add for middle state
        }
    elif is_regression:
        cost = fluid.layers.square_error_cost(input=logits, label=labels)
        loss = fluid.layers.mean(x=cost)
        graph_vars = {
            "loss": loss,
            "probs": logits,
            "labels": labels,
            "num_seqs": num_seqs,
            "qids": qids
        }
    else:
        raise ValueError(
            'unsupported fine tune mode. only supported classify/regression')

    return pyreader, graph_vars
Exemplo n.º 24
0
    def infilling_decode(self):
        if self.task_type == "dialog":
            emb_num = 4
        else:
            emb_num = 3
        input_shapes = [[-1, self.max_seq_len, 1]] * emb_num + \
                       [[-1, self.max_seq_len, self.max_seq_len]]
        input_dtypes = ['int64'] * emb_num + ['float32']
        input_lod_levels = [0] * emb_num + [0]

        shapes = input_shapes + [[-1, self.max_seq_len, 1],
                                 [-1, self.max_seq_len, 1], [-1, 1], [-1],
                                 [-1, 1, self.max_seq_len], [-1, 1]]
        dtypes = input_dtypes + [
            'int64', 'int64', 'float32', 'int32', 'float32', 'int64'
        ]
        lod_levels = input_lod_levels + [2, 2, 2, 0, 0, 0]

        inputs = self.to_ternsor(shapes, dtypes, lod_levels)
        pyreader = fluid.io.DataLoader.from_generator(feed_list=inputs,
                                                      capacity=50,
                                                      iterable=False)

        emb_ids = {}
        for key, value in zip(self.emb_keys, inputs[:emb_num]):
            emb_ids[key] = value

        input_mask = inputs[emb_num]
        tgt_ids, tgt_pos, init_scores, parent_idx, tgt_input_mask, data_ids = inputs[
            -6:]

        ernie = ErnieModel(emb_ids=emb_ids,
                           input_mask=input_mask,
                           config=self.ernie_config,
                           use_fp16=self.use_fp16,
                           task_type=self.task_type,
                           decoding=True,
                           gather_idx=parent_idx)

        max_len = layers.fill_constant(shape=[1],
                                       dtype=tgt_ids.dtype,
                                       value=self.max_dec_len,
                                       force_cpu=True)
        step_idx = layers.fill_constant(shape=[1],
                                        dtype=tgt_ids.dtype,
                                        value=0,
                                        force_cpu=True)
        pos_idx = layers.fill_constant(shape=[1],
                                       dtype=tgt_ids.dtype,
                                       value=1,
                                       force_cpu=True)
        cond = layers.less_than(x=step_idx, y=max_len)
        while_op = layers.While(cond)

        ids = layers.array_write(layers.reshape(tgt_ids, (-1, 1)), step_idx)
        pos_biases = layers.array_write(layers.reshape(tgt_pos, (-1, 1)),
                                        step_idx)
        scores = layers.array_write(init_scores, step_idx)
        tgt_masks = layers.array_write(tgt_input_mask, step_idx)

        with while_op.block():
            pre_ids = layers.array_read(array=ids, i=step_idx)
            pre_ids = layers.reshape(pre_ids, (-1, 1, 1), inplace=True)
            pre_scores = layers.array_read(array=scores, i=step_idx)
            pos_bias = layers.array_read(array=pos_biases, i=step_idx)
            pos_bias = layers.gather(input=pos_bias, index=parent_idx)
            tmp_mask = layers.array_read(tgt_masks, i=step_idx)

            def gen_batch_like(value,
                               dtype="int64",
                               shape=[-1, 1, 1],
                               is_scalar=True):
                if is_scalar:
                    return layers.fill_constant_batch_size_like(
                        input=parent_idx,
                        value=value,
                        shape=shape,
                        dtype=dtype)
                else:
                    return layers.elementwise_mul(
                        x=layers.fill_constant_batch_size_like(
                            input=parent_idx,
                            value=1,
                            shape=shape,
                            dtype=dtype),
                        y=value,
                        axis=0)

            tmp_mask = layers.gather(input=tmp_mask, index=parent_idx)
            append_0_mask = gen_batch_like(0.0, dtype=tmp_mask.dtype)
            append_1_mask = gen_batch_like(1.0, dtype=tmp_mask.dtype)
            tmp_mask = layers.concat([tmp_mask, append_1_mask], axis=2)
            pre_mask = layers.concat([tmp_mask, append_0_mask], axis=2)
            cur_mask = layers.concat([tmp_mask, append_1_mask], axis=2)

            cur_ids = gen_batch_like(self.attn_id)
            pre_pos = gen_batch_like(step_idx, is_scalar=False)
            cur_pos = gen_batch_like(pos_idx, is_scalar=False)
            if self.continuous_position:
                pre_pos = pre_pos + pos_bias
                cur_pos = cur_pos + pos_bias

            dec_emb_ids = {
                "word_embedding": layers.concat([pre_ids, cur_ids], axis=1),
                "pos_embedding": layers.concat([pre_pos, cur_pos], axis=1)
            }
            if self.task_type == "dialog":
                role_ids = gen_batch_like(0)
                turn_ids = gen_batch_like(0)
                dec_emb_ids["role_embedding"] = layers.concat(
                    [role_ids, role_ids], axis=1)
                dec_emb_ids["turn_embedding"] = layers.concat(
                    [turn_ids, turn_ids], axis=1)
            else:
                sent_ids = gen_batch_like(self.tgt_type_id)
                dec_emb_ids["sent_embedding"] = layers.concat(
                    [sent_ids, sent_ids], axis=1)
            dec_mask = layers.concat([pre_mask, cur_mask], axis=1)

            dec_out = ernie.encode(dec_emb_ids,
                                   dec_mask,
                                   parent_idx,
                                   remove_query=True)
            fc_out = self.cal_logit(dec_out[:, 1:, :], None)
            topk_scores, topk_indices = layers.topk(
                input=layers.softmax(fc_out), k=self.beam_size)
            pre_lenpen = layers.pow(
                (5.0 + layers.cast(step_idx, pre_scores.dtype)) / 6.0,
                self.length_penalty)
            cur_lenpen = layers.pow(
                (5.0 + layers.cast(pos_idx, pre_scores.dtype)) / 6.0,
                self.length_penalty)
            accu_scores = layers.elementwise_add(x=layers.log(topk_scores),
                                                 y=pre_scores * pre_lenpen,
                                                 axis=0) / cur_lenpen
            topk_indices = layers.lod_reset(topk_indices, pre_ids)
            accu_scores = layers.lod_reset(accu_scores, pre_ids)
            selected_ids, selected_scores, gather_idx = layers.beam_search(
                pre_ids=pre_ids,
                pre_scores=pre_scores,
                ids=topk_indices,
                scores=accu_scores,
                beam_size=self.beam_size,
                end_id=self.eos_idx,
                return_parent_idx=True)

            layers.increment(x=step_idx, value=1.0, in_place=True)
            layers.increment(x=pos_idx, value=1.0, in_place=True)
            layers.array_write(selected_ids, i=step_idx, array=ids)
            layers.array_write(selected_scores, i=step_idx, array=scores)
            layers.array_write(tmp_mask, i=step_idx, array=tgt_masks)
            layers.array_write(pos_bias, i=step_idx, array=pos_biases)

            layers.assign(gather_idx, parent_idx)
            length_cond = layers.less_than(x=step_idx, y=max_len)
            finish_cond = layers.logical_not(layers.is_empty(x=selected_ids))
            layers.logical_and(x=length_cond, y=finish_cond, out=cond)

        finished_ids, finished_scores = layers.beam_search_decode(
            ids, scores, beam_size=self.beam_size, end_id=self.eos_idx)

        graph_vars = {
            "finished_ids": finished_ids,
            "finished_scores": finished_scores,
            "data_ids": data_ids
        }

        for k, v in graph_vars.items():
            v.persistable = True

        return pyreader, graph_vars
Exemplo n.º 25
0
def create_model(args, pyreader_name, ernie_config, is_prediction=False):
    src_ids = fluid.layers.data(name='1',
                                shape=[-1, args.max_seq_len, 1],
                                dtype='int64')
    sent_ids = fluid.layers.data(name='2',
                                 shape=[-1, args.max_seq_len, 1],
                                 dtype='int64')
    pos_ids = fluid.layers.data(name='3',
                                shape=[-1, args.max_seq_len, 1],
                                dtype='int64')
    task_ids = fluid.layers.data(name='4',
                                 shape=[-1, args.max_seq_len, 1],
                                 dtype='int64')
    input_mask = fluid.layers.data(name='5',
                                   shape=[-1, args.max_seq_len, 1],
                                   dtype='float32')
    labels = fluid.layers.data(name='7',
                               shape=[-1, args.max_seq_len, 1],
                               dtype='int64')
    seq_lens = fluid.layers.data(name='8', shape=[-1], dtype='int64')

    pyreader = fluid.io.DataLoader.from_generator(feed_list=[
        src_ids, sent_ids, pos_ids, task_ids, input_mask, labels, seq_lens
    ],
                                                  capacity=70,
                                                  iterable=False)

    ernie = ErnieModel(src_ids=src_ids,
                       position_ids=pos_ids,
                       sentence_ids=sent_ids,
                       task_ids=task_ids,
                       input_mask=input_mask,
                       config=ernie_config,
                       use_fp16=args.use_fp16)

    enc_out = ernie.get_sequence_output()
    enc_out = fluid.layers.dropout(x=enc_out,
                                   dropout_prob=0.1,
                                   dropout_implementation="upscale_in_train")
    logits = fluid.layers.fc(
        input=enc_out,
        size=args.num_labels,
        num_flatten_dims=2,
        param_attr=fluid.ParamAttr(
            name="cls_seq_label_out_w",
            initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
        bias_attr=fluid.ParamAttr(name="cls_seq_label_out_b",
                                  initializer=fluid.initializer.Constant(0.)))
    infers = fluid.layers.argmax(logits, axis=2)

    ret_infers = fluid.layers.reshape(x=infers, shape=[-1, 1])
    lod_labels = fluid.layers.sequence_unpad(labels, seq_lens)
    lod_infers = fluid.layers.sequence_unpad(infers, seq_lens)

    (_, _, _, num_infer, num_label, num_correct) = fluid.layers.chunk_eval(
        input=lod_infers,
        label=lod_labels,
        chunk_scheme=args.chunk_scheme,
        num_chunk_types=((args.num_labels - 1) //
                         (len(args.chunk_scheme) - 1)))

    labels = fluid.layers.flatten(labels, axis=2)
    ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
        logits=fluid.layers.flatten(logits, axis=2),
        label=labels,
        return_softmax=True)
    input_mask = fluid.layers.flatten(input_mask, axis=2)
    ce_loss = ce_loss * input_mask
    loss = fluid.layers.mean(x=ce_loss)

    graph_vars = {
        "inputs": src_ids,
        "loss": loss,
        "probs": probs,
        "seqlen": seq_lens,
        "num_infer": num_infer,
        "num_label": num_label,
        "num_correct": num_correct,
    }

    for k, v in graph_vars.items():
        v.persistable = True

    return pyreader, graph_vars
Exemplo n.º 26
0
def create_model(args,
                 pyreader_name,
                 ernie_config,
                 batch_size=16,
                 is_prediction=False,
                 task_name="",
                 fleet_handle=None):
    print ("DEBUG:\tclassify")
    pyreader = fluid.layers.py_reader(
        capacity=50,
        shapes=[[batch_size, args.q_max_seq_len, 1], [batch_size, args.q_max_seq_len, 1],
            [batch_size, args.q_max_seq_len, 1], [batch_size, args.q_max_seq_len, 1],
            [batch_size, args.q_max_seq_len, 1],
            [batch_size, args.p_max_seq_len, 1], [batch_size, args.p_max_seq_len, 1],
            [batch_size, args.p_max_seq_len, 1], [batch_size, args.p_max_seq_len, 1],
            [batch_size, args.p_max_seq_len, 1],
            [batch_size, args.p_max_seq_len, 1], [batch_size, args.p_max_seq_len, 1],
            [batch_size, args.p_max_seq_len, 1], [batch_size, args.p_max_seq_len, 1],
            [batch_size, args.p_max_seq_len, 1],
            [batch_size, 1], [batch_size, 1]],
        dtypes=['int64', 'int64', 'int64', 'int64', 'float32',
                'int64', 'int64', 'int64', 'int64', 'float32',
                'int64', 'int64', 'int64', 'int64', 'float32',
                'int64', 'int64'],
        lod_levels=[0, 0, 0, 0, 0,   0, 0, 0, 0, 0,  0, 0, 0, 0, 0,  0, 0],
        name=task_name + "_" + pyreader_name,
        use_double_buffer=True)

    (src_ids_q, sent_ids_q, pos_ids_q, task_ids_q, input_mask_q,
     src_ids_p_pos, sent_ids_p_pos, pos_ids_p_pos, task_ids_p_pos, input_mask_p_pos,
     src_ids_p_neg, sent_ids_p_neg, pos_ids_p_neg, task_ids_p_neg, input_mask_p_neg,
     labels, qids) = fluid.layers.read_file(pyreader)

    ernie_q = ErnieModel(
        src_ids=src_ids_q,
        position_ids=pos_ids_q,
        sentence_ids=sent_ids_q,
        task_ids=task_ids_q,
        input_mask=input_mask_q,
        config=ernie_config,
        model_name='query_')
    ## pos para
    ernie_pos = ErnieModel(
        src_ids=src_ids_p_pos,
        position_ids=pos_ids_p_pos,
        sentence_ids=sent_ids_p_pos,
        task_ids=task_ids_p_pos,
        input_mask=input_mask_p_pos,
        config=ernie_config,
        model_name='titlepara_')
    ## neg para
    ernie_neg = ErnieModel(
        src_ids=src_ids_p_neg,
        position_ids=pos_ids_p_neg,
        sentence_ids=sent_ids_p_neg,
        task_ids=task_ids_p_neg,
        input_mask=input_mask_p_neg,
        config=ernie_config,
        model_name='titlepara_')

    q_cls_feats = ernie_q.get_cls_output()
    pos_cls_feats = ernie_pos.get_cls_output()
    neg_cls_feats = ernie_neg.get_cls_output()
    #src_ids_p_pos = fluid.layers.Print(src_ids_p_pos, message='pos: ')
    #pos_cls_feats = fluid.layers.Print(pos_cls_feats, message='pos: ')

    p_cls_feats = fluid.layers.concat([pos_cls_feats, neg_cls_feats], axis=0)

    if is_prediction:
        p_cls_feats = fluid.layers.slice(p_cls_feats, axes=[0], starts=[0], ends=[batch_size])
        multi = fluid.layers.elementwise_mul(q_cls_feats, p_cls_feats)
        probs = fluid.layers.reduce_sum(multi, dim=-1)

        graph_vars = {
            "probs": probs,
            "qids": qids,
            "q_rep": q_cls_feats,
            "p_rep": p_cls_feats
        }
        return pyreader, graph_vars

    if args.use_cross_batch and fleet_handle is not None:
        print("worker num is: {}".format(fleet_handle.worker_num()))
        all_p_cls_feats = fluid.layers.collective._c_allgather(
                p_cls_feats, fleet_handle.worker_num(), use_calc_stream=True)

        #multiply
        logits = fluid.layers.matmul(q_cls_feats, all_p_cls_feats, transpose_x=False, transpose_y=True)
        worker_id = fleet_handle.worker_index()

    else:
        logits = fluid.layers.matmul(q_cls_feats, p_cls_feats, transpose_x=False, transpose_y=True)
        worker_id = 0

    probs = logits

    all_labels = np.array(range(batch_size * worker_id * 2, batch_size * (worker_id * 2 + 1)), dtype='int64')
    matrix_labels = fluid.layers.assign(all_labels)
    matrix_labels = fluid.layers.unsqueeze(matrix_labels, axes=1)
    matrix_labels.stop_gradient=True
#    fluid.layers.Print(matrix_labels, message='matrix_labels')

    #print('DEBUG:\tstart loss')
    ce_loss = fluid.layers.softmax_with_cross_entropy(
           logits=logits, label=matrix_labels)
    loss = fluid.layers.mean(x=ce_loss)
    #print('DEBUG:\tloss done')

    num_seqs = fluid.layers.create_tensor(dtype='int64')
    accuracy = fluid.layers.accuracy(
        input=probs, label=matrix_labels)

    graph_vars = {
        "loss": loss,
        "probs": probs,
        "accuracy": accuracy,
        "labels": labels,
        "num_seqs": num_seqs,
        "qids": qids,
        "q_rep": q_cls_feats,
        "p_rep": p_cls_feats
    }

    cp = []
    cp.extend(ernie_q.checkpoints)
    cp.extend(ernie_pos.checkpoints)
    cp.extend(ernie_neg.checkpoints)
    return pyreader, graph_vars, cp