示例#1
0
def create_model_for_multi_task(args, vocab_size, is_prediction=False):
    # 处理词典大小
    if args['vocab_size'] > 0:
        vocab_size = args['vocab_size']

    # 输入定义
    qas_ids = fluid.data(name='qas_ids', dtype='int64', shape=[-1, 1])
    src_ids = fluid.data(name='src_ids',
                         dtype='int64',
                         shape=[-1, args['max_seq_length'], 1])
    pos_ids = fluid.data(name='pos_ids',
                         dtype='int64',
                         shape=[-1, args['max_seq_length'], 1])
    sent_ids = fluid.data(name='sent_ids',
                          dtype='int64',
                          shape=[-1, args['max_seq_length'], 1])
    input_mask = fluid.data(name='input_mask',
                            dtype='float32',
                            shape=[-1, args['max_seq_length'], 1])
    labels = fluid.data(name='labels', dtype='int64', shape=[-1, 1])
    labels_for_reverse = fluid.data(name='labels_for_reverse',
                                    dtype='int64',
                                    shape=[-1, 1])
    # 根据任务的不同调整所需的数据,预测任务相比训练任务缺少label这一项数据
    if is_prediction:
        feed_list = [qas_ids, src_ids, pos_ids, sent_ids, input_mask]
    else:
        feed_list = [
            qas_ids, src_ids, pos_ids, sent_ids, input_mask, labels,
            labels_for_reverse
        ]
    reader = fluid.io.DataLoader.from_generator(feed_list=feed_list,
                                                capacity=64,
                                                iterable=True)

    # 模型部分
    # 由bert后接一层全连接完成预测任务

    # bert部分
    config = args
    config['vocab_size'] = vocab_size
    bert = BertModel(src_ids=src_ids,
                     position_ids=pos_ids,
                     sentence_ids=sent_ids,
                     input_mask=input_mask,
                     config=config,
                     use_fp16=False,
                     is_prediction=is_prediction)

    mrc_layer = config['mrc_layer']
    freeze_pretrained_model = config['freeze_pretrained_model']

    cls_feats, reverse_feats = bert.get_pooled_outputs()
    bert_encode = bert.get_sequence_output()
    if freeze_pretrained_model:
        cls_feats.stop_gradient = True
        bert_encode.stop_gradient = True

    logits = None
    if mrc_layer == "cls_fc":
        # 取[CLS]的输出经全连接进行预测
        cls_feats = fluid.layers.dropout(
            x=cls_feats,
            dropout_prob=0.1,
            dropout_implementation="upscale_in_train",
            is_test=is_prediction)
        logits = fluid.layers.fc(
            input=cls_feats,
            size=args['num_labels'],
            param_attr=fluid.ParamAttr(
                name="cls_out_w",
                initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
            bias_attr=fluid.ParamAttr(
                name="cls_out_b", initializer=fluid.initializer.Constant(0.)))
        logits_for_reverse = fluid.layers.fc(
            input=reverse_feats,
            size=2,
            param_attr=fluid.ParamAttr(
                name="reverse_out_w",
                initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
            bias_attr=fluid.ParamAttr(
                name="reverse_out_b",
                initializer=fluid.initializer.Constant(0.)))
    elif mrc_layer == "capsNet":
        # 取完整的bert_output,输入胶囊网络
        bert_output = bert_encode
        param_attr = fluid.ParamAttr(
            name='conv2d.weight',
            initializer=fluid.initializer.Xavier(uniform=False),
            learning_rate=0.001)
        bert_output = fluid.layers.unsqueeze(input=bert_output, axes=[1])
        capsules = fluid.layers.conv2d(input=bert_output,
                                       num_filters=256,
                                       filter_size=32,
                                       stride=15,
                                       padding="VALID",
                                       act="relu",
                                       param_attr=param_attr)
        # (batch_size, 256, 33, 50)
        primaryCaps = CapsLayer(num_outputs=32,
                                vec_len=8,
                                with_routing=False,
                                layer_type='CONV')
        caps1 = primaryCaps(capsules, kernel_size=9, stride=2)
        # (batch_size, 8736, 8, 1)
        classifierCaps = CapsLayer(num_outputs=args['num_labels'],
                                   vec_len=16,
                                   with_routing=True,
                                   layer_type='FC')
        caps2 = classifierCaps(caps1)
        # (batch_size, 3, 16, 1)

        epsilon = 1e-9
        v_length = fluid.layers.sqrt(
            fluid.layers.reduce_sum(
                fluid.layers.square(caps2), -2, keep_dim=True) + epsilon)
        logits = fluid.layers.squeeze(v_length, axes=[2, 3])

    elif mrc_layer == "lstm":
        hidden_size = 128

        cell = fluid.layers.LSTMCell(hidden_size=hidden_size)
        cell_r = fluid.layers.LSTMCell(hidden_size=hidden_size)
        encoded = bert_encode[:, 1:, :]
        encoded = fluid.layers.dropout(
            x=encoded,
            dropout_prob=0.1,
            dropout_implementation="upscale_in_train")
        outputs = fluid.layers.rnn(cell, encoded)[0][:, -1, :]
        outputs_r = fluid.layers.rnn(cell_r, encoded,
                                     is_reverse=True)[0][:, -1, :]
        outputs = fluid.layers.concat(input=[outputs, outputs_r], axis=1)

        cls_feats = outputs
        cls_feats = fluid.layers.dropout(
            x=cls_feats,
            dropout_prob=0.1,
            dropout_implementation="upscale_in_train")
        # fc = fluid.layers.fc(input=cls_feats, size=hidden_size*2)
        # fc = fluid.layers.dropout(
        #     x=fc,
        #     dropout_prob=0.1,
        #     dropout_implementation="upscale_in_train")
        logits = fluid.layers.fc(
            input=cls_feats,
            size=args['num_labels'],
            param_attr=fluid.ParamAttr(
                name="lstm_fc_w",
                initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
            bias_attr=fluid.ParamAttr(
                name="lstm_fc_b", initializer=fluid.initializer.Constant(0.)))

    # 根据任务返回不同的结果
    # 预测任务仅返回dataloader和预测出的每个label对应的概率
    if is_prediction:
        probs = fluid.layers.softmax(logits)
        return reader, probs, qas_ids

    # 训练任务则计算loss
    ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
        logits=logits, label=labels, return_softmax=True)
    loss = fluid.layers.mean(x=ce_loss)

    ce_loss_for_reverse, probs_for_reverse = fluid.layers.softmax_with_cross_entropy(
        logits=logits_for_reverse,
        label=labels_for_reverse,
        return_softmax=True)
    loss_for_reverse = fluid.layers.mean(x=ce_loss_for_reverse)

    if args['use_fp16'] and args.loss_scaling > 1.0:
        loss *= args.loss_scaling

    num_seqs = fluid.layers.create_tensor(dtype='int64')
    accuracy = fluid.layers.accuracy(input=probs, label=labels, total=num_seqs)
    accuracy_for_reverse = fluid.layers.accuracy(input=probs_for_reverse,
                                                 label=labels_for_reverse,
                                                 total=num_seqs)

    # 返回dataloader,loss,预测结果,和准确度
    return reader, loss + loss_for_reverse, probs, accuracy, accuracy_for_reverse, qas_ids
示例#2
0
    def __init__(self, args=None, detect_entities=False):
        if args is None:
            self.args = load_pickle("args.pkl")
        else:
            self.args = args
        self.cuda = torch.cuda.is_available()
        self.detect_entities = detect_entities

        if self.detect_entities:
            self.nlp = spacy.load("en_core_web_lg")
        else:
            self.nlp = None
        self.entities_of_interest = [
            "PERSON",
            "NORP",
            "FAC",
            "ORG",
            "GPE",
            "LOC",
            "PRODUCT",
            "EVENT",
            "WORK_OF_ART",
            "LAW",
            "LANGUAGE",
            "PER",
        ]

        logger.info("Loading tokenizer and model...")
        from .train_funcs import load_state

        if self.args.model_no == 0:
            from model.bert import BertModel as Model

            model = args.model_size  #'bert-base-uncased'
            model_name = "BERT"
            self.net = Model.from_pretrained(
                model,
                force_download=False,
                model_size=args.model_size,
                task="classification",
                n_classes_=self.args.num_classes,
            )
        elif self.args.model_no == 1:
            from model.albert.albert import AlbertModel as Model

            model = args.model_size  #'albert-base-v2'
            model_name = "BERT"
            self.net = Model.from_pretrained(
                model,
                force_download=False,
                model_size=args.model_size,
                task="classification",
                n_classes_=self.args.num_classes,
            )
        elif args.model_no == 2:  # BioBert
            from model.bert import BertModel, BertConfig

            model = "bert-base-uncased"
            model_name = "BioBERT"
            config = BertConfig.from_pretrained(
                "./additional_models/biobert_v1.1_pubmed/bert_config.json"
            )
            self.net = BertModel.from_pretrained(
                pretrained_model_name_or_path="./additional_models/biobert_v1.1_pubmed/biobert_v1.1_pubmed.bin",
                config=config,
                force_download=False,
                model_size="bert-base-uncased",
                task="classification",
                n_classes_=self.args.num_classes,
            )

        self.tokenizer = load_pickle("%s_tokenizer.pkl" % model_name)
        self.net.resize_token_embeddings(len(self.tokenizer))
        if self.cuda:
            self.net.cuda()
        start_epoch, best_pred, amp_checkpoint = load_state(
            self.net, None, None, self.args, load_best=False
        )
        logger.info("Done!")

        self.e1_id = self.tokenizer.convert_tokens_to_ids("[E1]")
        self.e2_id = self.tokenizer.convert_tokens_to_ids("[E2]")
        self.pad_id = self.tokenizer.pad_token_id
        self.rm = load_pickle("relations.pkl")
示例#3
0
def create_model(args, vocab_size, is_prediction=False, is_validate=False):
    """
    搭建分类模型
    被训练模块和预测模块直接调用
    返回相关的计算结果和对应的dataloader对象
    :param args: 参数
    :param vocab_size: 词典大小,用于构建词嵌入层。注意当参数设置词典大小时,该项无效
    :param is_prediction: 是否是预测模式,将禁用dropout等。
    :param is_validate: 是否是验证模式,除了禁用dropout,还将返回loss和acc,如果输入数据中没有对应项,则会报错。
    :return:
    """
    # 处理词典大小
    if args['vocab_size'] > 0:
        vocab_size = args['vocab_size']

    # 输入定义
    qas_ids = fluid.data(name='qas_ids', dtype='int64', shape=[-1, 1])
    src_ids = fluid.data(name='src_ids',
                         dtype='int64',
                         shape=[-1, args['max_seq_length'], 1])
    pos_ids = fluid.data(name='pos_ids',
                         dtype='int64',
                         shape=[-1, args['max_seq_length'], 1])
    sent_ids = fluid.data(name='sent_ids',
                          dtype='int64',
                          shape=[-1, args['max_seq_length'], 1])
    input_mask = fluid.data(name='input_mask',
                            dtype='float32',
                            shape=[-1, args['max_seq_length'], 1])
    # 根据任务的不同调整所需的数据,预测任务相比训练任务缺少label这一项数据
    labels = fluid.data(name='labels', dtype='int64', shape=[-1, 1])
    # engineer_ids = fluid.data(name='engineer_ids', dtype='int64', shape=[-1, args['max_seq_length']+1, 1])
    engineer_ids = fluid.data(name='engineer_ids',
                              dtype='int64',
                              shape=[-1, args['max_seq_length'], 1])

    config = args

    if is_prediction:
        feed_list = [qas_ids, src_ids, pos_ids, sent_ids, input_mask]
    else:
        feed_list = [qas_ids, src_ids, pos_ids, sent_ids, input_mask, labels]
    if config['use_engineer']:
        feed_list.append(engineer_ids)
    reader = fluid.io.DataLoader.from_generator(feed_list=feed_list,
                                                capacity=64,
                                                iterable=True)

    # 模型部分
    # 由bert后接一层全连接完成预测任务

    # bert部分
    config['vocab_size'] = vocab_size
    bert = BertModel(src_ids=src_ids,
                     position_ids=pos_ids,
                     sentence_ids=sent_ids,
                     input_mask=input_mask,
                     config=config,
                     use_fp16=False,
                     is_prediction=(is_prediction or is_validate))

    mrc_layer = config['mrc_layer']
    freeze_pretrained_model = config['freeze_pretrained_model']

    cls_feats = bert.get_pooled_output()
    bert_encode = bert.get_sequence_output()

    if freeze_pretrained_model:
        cls_feats.stop_gradient = True
        bert_encode.stop_gradient = True

    if config['use_engineer']:
        # entity_sim = engineer_ids[:,-1,:]
        # entity_sim_code = fluid.layers.one_hot(input=entity_sim, depth=2, allow_out_of_range=False)
        # engineer_emb = fluid.layers.embedding(input=engineer_ids[:,:-1,:], size=[32, 8])
        engineer_emb = fluid.layers.embedding(input=engineer_ids, size=[32, 8])
        bert_encode = fluid.layers.concat(input=[bert_encode, engineer_emb],
                                          axis=-1)

    logits = None
    if mrc_layer == "cls_fc":
        # 取[CLS]的输出经全连接进行预测
        cls_feats = fluid.layers.dropout(
            x=cls_feats,
            dropout_prob=0.1,
            is_test=(is_prediction or is_validate),
            dropout_implementation="upscale_in_train")
        logits = fluid.layers.fc(
            input=cls_feats,
            size=args['num_labels'],
            param_attr=fluid.ParamAttr(
                name="cls_out_w",
                initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
            bias_attr=fluid.ParamAttr(
                name="cls_out_b", initializer=fluid.initializer.Constant(0.)))
    elif mrc_layer == "capsNet":
        # 取完整的bert_output,输入胶囊网络
        bert_output = bert_encode
        param_attr = fluid.ParamAttr(
            name='conv2d.weight',
            initializer=fluid.initializer.Xavier(uniform=False),
            learning_rate=0.001)
        bert_output = fluid.layers.unsqueeze(input=bert_output, axes=[1])
        capsules = fluid.layers.conv2d(input=bert_output,
                                       num_filters=256,
                                       filter_size=32,
                                       stride=15,
                                       padding="VALID",
                                       act="relu",
                                       param_attr=param_attr)
        # (batch_size, 256, 33, 50)
        primaryCaps = CapsLayer(num_outputs=32,
                                vec_len=8,
                                with_routing=False,
                                layer_type='CONV')
        caps1 = primaryCaps(capsules, kernel_size=9, stride=2)
        # (batch_size, 8736, 8, 1)
        classifierCaps = CapsLayer(num_outputs=args['num_labels'],
                                   vec_len=16,
                                   with_routing=True,
                                   layer_type='FC')
        caps2 = classifierCaps(caps1)
        # (batch_size, 3, 16, 1)

        epsilon = 1e-9
        v_length = fluid.layers.sqrt(
            fluid.layers.reduce_sum(
                fluid.layers.square(caps2), -2, keep_dim=True) + epsilon)
        logits = fluid.layers.squeeze(v_length, axes=[2, 3])

    elif mrc_layer == "lstm":
        hidden_size = args['lstm_hidden_size']

        cell = fluid.layers.LSTMCell(hidden_size=hidden_size)
        cell_r = fluid.layers.LSTMCell(hidden_size=hidden_size)
        encoded = bert_encode[:, 1:, :]
        encoded = fluid.layers.dropout(
            x=encoded,
            is_test=(is_prediction or is_validate),
            dropout_prob=0.1,
            dropout_implementation="upscale_in_train")
        outputs = fluid.layers.rnn(cell, encoded)[0][:, -1, :]
        outputs_r = fluid.layers.rnn(cell_r, encoded,
                                     is_reverse=True)[0][:, -1, :]
        outputs = fluid.layers.concat(input=[outputs, outputs_r], axis=1)

        cls_feats = outputs
        cls_feats = fluid.layers.dropout(
            x=cls_feats,
            is_test=(is_prediction or is_validate),
            dropout_prob=0.1,
            dropout_implementation="upscale_in_train")
        # fc = fluid.layers.fc(input=cls_feats, size=hidden_size*2)
        # fc = fluid.layers.dropout(
        #     x=fc,
        #     dropout_prob=0.1,
        #     dropout_implementation="upscale_in_train")
        logits = fluid.layers.fc(
            input=cls_feats,
            size=args['num_labels'],
            param_attr=fluid.ParamAttr(
                name="lstm_fc_w",
                initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
            bias_attr=fluid.ParamAttr(
                name="lstm_fc_b", initializer=fluid.initializer.Constant(0.)))

    elif mrc_layer == "highway_lstm":

        hidden_size = 128

        cell = fluid.layers.LSTMCell(hidden_size=hidden_size)
        cell_r = fluid.layers.LSTMCell(hidden_size=hidden_size)
        encoded = bert_encode[:, 1:, :]
        encoded = fluid.layers.dropout(
            x=encoded,
            is_test=(is_prediction or is_validate),
            dropout_prob=0.1,
            dropout_implementation="upscale_in_train")

        encoded = highway_layer(encoded, name="highway1", num_flatten_dims=2)
        encoded = fluid.layers.dropout(
            x=encoded,
            is_test=(is_prediction or is_validate),
            dropout_prob=0.1,
            dropout_implementation="upscale_in_train")

        outputs = fluid.layers.rnn(cell, encoded)[0][:, -1, :]
        outputs_r = fluid.layers.rnn(cell_r, encoded,
                                     is_reverse=True)[0][:, -1, :]
        outputs = fluid.layers.concat(input=[outputs, outputs_r], axis=1)

        cls_feats = outputs
        cls_feats = fluid.layers.dropout(
            x=cls_feats,
            is_test=(is_prediction or is_validate),
            dropout_prob=0.1,
            dropout_implementation="upscale_in_train")
        # fc = fluid.layers.fc(input=cls_feats, size=hidden_size*2)
        # fc = fluid.layers.dropout(
        #     x=fc,
        #     dropout_prob=0.1,
        #     dropout_implementation="upscale_in_train")
        logits = fluid.layers.fc(
            input=cls_feats,
            size=args['num_labels'],
            param_attr=fluid.ParamAttr(
                name="lstm_fc_w",
                initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
            bias_attr=fluid.ParamAttr(
                name="lstm_fc_b", initializer=fluid.initializer.Constant(0.)))

    # 根据任务返回不同的结果
    # 预测任务仅返回dataloader和预测出的每个label对应的概率
    if is_prediction and not is_validate:
        probs = fluid.layers.softmax(logits)
        return reader, probs, qas_ids

    # 训练任务则计算loss
    ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
        logits=logits, label=labels, return_softmax=True)
    # loss = fluid.layers.mean(x=ce_loss)

    weight = fluid.layers.assign(np.array([[1.], [1.], [1.3]],
                                          dtype='float32'))

    def lossweighed(ce_loss, labels):
        one_hot = fluid.one_hot(input=labels, depth=args["num_labels"])
        lw = fluid.layers.matmul(one_hot, weight)
        lw = fluid.layers.reduce_sum(lw, dim=1)
        loss = fluid.layers.elementwise_mul(lw, ce_loss)
        loss = fluid.layers.mean(loss)
        return loss

    loss = lossweighed(ce_loss, labels)

    if args['use_fp16'] and args.loss_scaling > 1.0:
        loss *= args.loss_scaling

    num_seqs = fluid.layers.create_tensor(dtype='int64')
    accuracy = fluid.layers.accuracy(input=probs, label=labels, total=num_seqs)

    # 返回dataloader,loss,预测结果,和准确度
    return reader, loss, probs, accuracy, qas_ids
示例#4
0
def create_model(pyreader_name,
                 bert_config,
                 max_wn_concept_length,
                 max_nell_concept_length,
                 wn_concept_embedding_mat,
                 nell_concept_embedding_mat,
                 is_training=False,
                 freeze=False):
    if is_training:
        pyreader = fluid.layers.py_reader(
            capacity=50,
            shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
                    [-1, args.max_seq_len, 1],
                    [-1, args.max_seq_len, max_wn_concept_length, 1],
                    [-1, args.max_seq_len, max_nell_concept_length, 1],
                    [-1, args.max_seq_len, 1], [-1, 1], [-1, 1]],
            dtypes=[
                'int64', 'int64', 'int64', 'int64', 'int64', 'float32',
                'int64', 'int64'
            ],
            lod_levels=[0, 0, 0, 0, 0, 0, 0, 0],
            name=pyreader_name,
            use_double_buffer=True)
        (src_ids, pos_ids, sent_ids, wn_concept_ids, nell_concept_ids,
         input_mask, start_positions,
         end_positions) = fluid.layers.read_file(pyreader)
    else:
        pyreader = fluid.layers.py_reader(
            capacity=50,
            shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
                    [-1, args.max_seq_len, 1],
                    [-1, args.max_seq_len, max_wn_concept_length, 1],
                    [-1, args.max_seq_len, max_nell_concept_length, 1],
                    [-1, args.max_seq_len, 1], [-1, 1]],
            dtypes=[
                'int64', 'int64', 'int64', 'int64', 'int64', 'float32', 'int64'
            ],
            lod_levels=[0, 0, 0, 0, 0, 0, 0],
            name=pyreader_name,
            use_double_buffer=True)
        (src_ids, pos_ids, sent_ids, wn_concept_ids, nell_concept_ids,
         input_mask, unique_id) = fluid.layers.read_file(pyreader)
    '''1st Layer: BERT Layer'''
    bert = BertModel(src_ids=src_ids,
                     position_ids=pos_ids,
                     sentence_ids=sent_ids,
                     input_mask=input_mask,
                     config=bert_config,
                     use_fp16=args.use_fp16)

    enc_out = bert.get_sequence_output()
    if freeze:
        enc_out.stop_gradient = True
    logger.info("enc_out.stop_gradient: {}".format(enc_out.stop_gradient))
    '''2nd layer: Memory Layer'''
    # get memory embedding
    wn_concept_vocab_size = wn_concept_embedding_mat.shape[0]
    wn_concept_dim = wn_concept_embedding_mat.shape[1]
    nell_concept_vocab_size = nell_concept_embedding_mat.shape[0]
    nell_concept_dim = nell_concept_embedding_mat.shape[1]
    wn_memory_embs = fluid.layers.embedding(
        wn_concept_ids,
        size=(wn_concept_vocab_size, wn_concept_dim),
        param_attr=fluid.ParamAttr(name="wn_concept_emb_mat",
                                   do_model_average=False,
                                   trainable=False),
        dtype='float32')
    nell_memory_embs = fluid.layers.embedding(
        nell_concept_ids,
        size=(nell_concept_vocab_size, nell_concept_dim),
        param_attr=fluid.ParamAttr(name="nell_concept_emb_mat",
                                   do_model_average=False,
                                   trainable=False),
        dtype='float32')

    # get memory length
    wn_concept_ids_reduced = fluid.layers.equal(
        wn_concept_ids,
        fluid.layers.fill_constant(
            shape=[1], value=0,
            dtype="int64"))  # [batch_size, sent_size, concept_size, 1]
    wn_concept_ids_reduced = fluid.layers.cast(
        wn_concept_ids_reduced,
        dtype="float32")  # [batch_size, sent_size, concept_size, 1]
    wn_concept_ids_reduced = fluid.layers.scale(fluid.layers.elementwise_sub(
        wn_concept_ids_reduced, fluid.layers.fill_constant([1], "float32", 1)),
                                                scale=-1)
    wn_mem_length = fluid.layers.reduce_sum(
        wn_concept_ids_reduced, dim=2)  # [batch_size, sent_size, 1]

    nell_concept_ids_reduced = fluid.layers.equal(
        nell_concept_ids,
        fluid.layers.fill_constant(
            shape=[1], value=0,
            dtype="int64"))  # [batch_size, sent_size, concept_size, 1]
    nell_concept_ids_reduced = fluid.layers.cast(
        nell_concept_ids_reduced,
        dtype="float32")  # [batch_size, sent_size, concept_size, 1]
    nell_concept_ids_reduced = fluid.layers.scale(fluid.layers.elementwise_sub(
        nell_concept_ids_reduced, fluid.layers.fill_constant([1], "float32",
                                                             1)),
                                                  scale=-1)
    nell_mem_length = fluid.layers.reduce_sum(
        nell_concept_ids_reduced, dim=2)  # [batch_size, sent_size, 1]

    # select and integrate
    wn_memory_layer = MemoryLayer(bert_config,
                                  max_wn_concept_length,
                                  wn_concept_dim,
                                  mem_method='raw',
                                  prefix='wn')
    wn_memory_output = wn_memory_layer.forward(enc_out,
                                               wn_memory_embs,
                                               wn_mem_length,
                                               ignore_no_memory_token=True)

    nell_memory_layer = MemoryLayer(bert_config,
                                    max_nell_concept_length,
                                    nell_concept_dim,
                                    mem_method='raw',
                                    prefix='nell')
    nell_memory_output = nell_memory_layer.forward(enc_out,
                                                   nell_memory_embs,
                                                   nell_mem_length,
                                                   ignore_no_memory_token=True)

    memory_output = fluid.layers.concat(
        [enc_out, wn_memory_output, nell_memory_output], axis=2)
    '''3rd layer: Self-Matching Layer'''
    # calculate input dim for self-matching layer
    memory_output_size = bert_config[
        'hidden_size'] + wn_concept_dim + nell_concept_dim
    logger.info("memory_output_size: {}".format(memory_output_size))

    # do matching
    self_att_layer = TriLinearTwoTimeSelfAttentionLayer(
        memory_output_size,
        dropout_rate=0.0,
        cat_mul=True,
        cat_sub=True,
        cat_twotime=True,
        cat_twotime_mul=False,
        cat_twotime_sub=True)  # [bs, sq, concat_hs]
    att_output = self_att_layer.forward(memory_output,
                                        input_mask)  # [bs, sq, concat_hs]
    '''4th layer: Output Layer'''
    logits = fluid.layers.fc(
        input=att_output,
        size=2,
        num_flatten_dims=2,
        param_attr=fluid.ParamAttr(
            name="cls_squad_out_w",
            initializer=fluid.initializer.NormalInitializer(
                loc=0.0, scale=bert_config['initializer_range'])),
        bias_attr=fluid.ParamAttr(name="cls_squad_out_b",
                                  initializer=fluid.initializer.Constant(0.)))

    logits = fluid.layers.transpose(x=logits, perm=[2, 0, 1])
    start_logits, end_logits = fluid.layers.unstack(x=logits, axis=0)

    batch_ones = fluid.layers.fill_constant_batch_size_like(input=start_logits,
                                                            dtype='int64',
                                                            shape=[1],
                                                            value=1)
    num_seqs = fluid.layers.reduce_sum(input=batch_ones)

    if is_training:

        def compute_loss(logits, positions):
            loss = fluid.layers.softmax_with_cross_entropy(logits=logits,
                                                           label=positions)
            loss = fluid.layers.mean(x=loss)
            return loss

        start_loss = compute_loss(start_logits, start_positions)
        end_loss = compute_loss(end_logits, end_positions)
        total_loss = (start_loss + end_loss) / 2.0
        if args.use_fp16 and args.loss_scaling > 1.0:
            total_loss = total_loss * args.loss_scaling

        return pyreader, total_loss, num_seqs
    else:
        return pyreader, unique_id, start_logits, end_logits, num_seqs
示例#5
0
    def __init__(self, args=None):
        if args is None:
            self.args = load_pickle("args.pkl")
        else:
            self.args = args
        self.cuda = torch.cuda.is_available()

        if self.args.model_no == 0:
            from model.bert import BertModel as Model
            from model.bert_tokenizer import BertTokenizer as Tokenizer

            model = args.model_size  #'bert-large-uncased' 'bert-base-uncased'
            model_name = "BERT"
            self.net = Model.from_pretrained(
                model,
                force_download=False,
                model_size=args.model_size,
                task="fewrel",
            )
        elif self.args.model_no == 1:
            from model.albert.albert import AlbertModel as Model
            from model.albert.albert_tokenizer import (
                AlbertTokenizer as Tokenizer,
            )

            model = args.model_size  #'albert-base-v2'
            model_name = "BERT"
            self.net = Model.from_pretrained(
                model,
                force_download=False,
                model_size=args.model_size,
                task="fewrel",
            )
        elif args.model_no == 2:  # BioBert
            from model.bert import BertModel, BertConfig
            from model.bert_tokenizer import BertTokenizer as Tokenizer

            model = "bert-base-uncased"
            model_name = "BioBERT"
            config = BertConfig.from_pretrained(
                "./additional_models/biobert_v1.1_pubmed/bert_config.json"
            )
            self.net = BertModel.from_pretrained(
                pretrained_model_name_or_path="./additional_models/biobert_v1.1_pubmed/biobert_v1.1_pubmed.bin",
                config=config,
                force_download=False,
                model_size="bert-base-uncased",
                task="fewrel",
            )

        if os.path.isfile("./data/%s_tokenizer.pkl" % model_name):
            self.tokenizer = load_pickle("%s_tokenizer.pkl" % model_name)
            logger.info("Loaded tokenizer from saved file.")
        else:
            logger.info(
                "Saved tokenizer not found, initializing new tokenizer..."
            )
            if args.model_no == 2:
                self.tokenizer = Tokenizer(
                    vocab_file="./additional_models/biobert_v1.1_pubmed/vocab.txt",
                    do_lower_case=False,
                )
            else:
                self.tokenizer = Tokenizer.from_pretrained(
                    model, do_lower_case=False
                )
            self.tokenizer.add_tokens(
                ["[E1]", "[/E1]", "[E2]", "[/E2]", "[BLANK]"]
            )
            save_as_pickle("%s_tokenizer.pkl" % model_name, self.tokenizer)
            logger.info(
                "Saved %s tokenizer at ./data/%s_tokenizer.pkl"
                % (model_name, model_name)
            )

        self.net.resize_token_embeddings(len(self.tokenizer))
        self.pad_id = self.tokenizer.pad_token_id

        if self.cuda:
            self.net.cuda()

        if self.args.use_pretrained_blanks == 1:
            logger.info(
                "Loading model pre-trained on blanks at ./data/test_checkpoint_%d.pth.tar..."
                % args.model_no
            )
            checkpoint_path = (
                "./data/test_checkpoint_%d.pth.tar" % self.args.model_no
            )
            checkpoint = torch.load(checkpoint_path)
            model_dict = self.net.state_dict()
            pretrained_dict = {
                k: v
                for k, v in checkpoint["state_dict"].items()
                if k in model_dict.keys()
            }
            model_dict.update(pretrained_dict)
            self.net.load_state_dict(pretrained_dict, strict=False)
            del checkpoint, pretrained_dict, model_dict

        logger.info("Loading Fewrel dataloaders...")
        self.train_loader, _, self.train_length, _ = load_dataloaders(args)
示例#6
0
    def __init__(self,
                 l2renorm,
                 expert_dims,
                 tokenizer,
                 keep_missing_modalities,
                 test_caption_mode,
                 freeze_weights=False,
                 mimic_ce_dims=False,
                 concat_experts=False,
                 concat_mix_experts=False,
                 use_experts='origfeat',
                 txt_inp=None,
                 txt_agg=None,
                 txt_pro=None,
                 txt_wgh=None,
                 vid_inp=None,
                 vid_cont=None,
                 vid_wgh=None,
                 pos_enc=None,
                 out_tok=None,
                 use_mask='nomask',
                 same_dim=512,
                 vid_bert_params=None,
                 txt_bert_params=None,
                 agg_dims=None,
                 device=None,
                 normalize_experts=True):
        super().__init__()

        self.sanity_checks = False
        modalities = list(expert_dims.keys())
        self.expert_dims = expert_dims
        self.modalities = modalities
        logger.debug(self.modalities)
        self.mimic_ce_dims = mimic_ce_dims
        self.concat_experts = concat_experts
        self.concat_mix_experts = concat_mix_experts
        self.test_caption_mode = test_caption_mode
        self.freeze_weights = freeze_weights
        self.use_experts = use_experts
        self.use_mask = use_mask
        self.keep_missing_modalities = keep_missing_modalities
        self.l2renorm = l2renorm
        self.same_dim = same_dim
        self.txt_inp = txt_inp
        self.txt_agg = txt_agg
        self.txt_pro = txt_pro
        self.txt_wgh = txt_wgh
        self.vid_inp = vid_inp
        self.vid_cont = vid_cont
        self.vid_wgh = vid_wgh
        self.pos_enc = pos_enc
        self.out_tok = out_tok
        self.vid_bert_params = vid_bert_params
        self.normalize_experts = normalize_experts
        self.text_modalities = self.modalities.copy()
        self.text_modalities.append('total')

        self.video_dim_reduce = nn.ModuleDict()
        for mod in self.modalities:
            in_dim = expert_dims[mod]['dim']
            if self.vid_inp in ['agg', 'both', 'all', 'temp']:
                self.video_dim_reduce[mod] = ReduceDim(in_dim, same_dim)

        if self.vid_cont == 'coll':
            self.g_reason_1 = nn.Linear(same_dim * 2, same_dim)
            dout_prob = vid_bert_params['hidden_dropout_prob']
            self.coll_g_dropout = nn.Dropout(dout_prob)
            self.g_reason_2 = nn.Linear(same_dim, same_dim)

            self.f_reason_1 = nn.Linear(same_dim, same_dim)
            self.coll_f_dropout = nn.Dropout(dout_prob)
            self.f_reason_2 = nn.Linear(same_dim, same_dim)
            self.f_reason_3 = nn.Linear(same_dim, same_dim)
            self.batch_norm_g1 = nn.BatchNorm1d(same_dim)
            self.batch_norm_g2 = nn.BatchNorm1d(same_dim)

            self.batch_norm_f1 = nn.BatchNorm1d(same_dim)
            self.batch_norm_f2 = nn.BatchNorm1d(same_dim)

            self.video_gu = nn.ModuleDict()
            for mod in self.modalities:
                self.video_GU[mod] = GatedEmbeddingUnitReasoning(same_dim)

        # If Bert architecture is employed for video
        elif self.vid_cont == 'bert':
            vid_bert_config = types.SimpleNamespace(**vid_bert_params)
            self.vid_bert = nn.ModuleDict()
            for mod in self.text_modalities:
                self.vid_bert[mod] = BertModel(vid_bert_config)

        elif self.vid_cont == 'none':
            pass

        if self.txt_agg[:4] in ['bert']:
            z = re.match(r'bert([a-z]{3})(\d*)(\D*)', txt_agg)
            assert z
            state = z.groups()[0]
            freeze_until = z.groups()[1]

            # Post aggregation: Use [CLS] token ("cls") or aggregate all tokens
            # (mxp, mnp)
            if z.groups()[2] and z.groups()[2] != 'cls':
                self.post_agg = z.groups()[2]
            else:
                self.post_agg = 'cls'

            if state in ['ftn', 'frz']:
                # State is finetune or frozen, we use a pretrained bert model
                txt_bert_config = 'bert-base-uncased'

                # Overwrite config
                if txt_bert_params is None:
                    dout_prob = vid_bert_params['hidden_dropout_prob']
                    txt_bert_params = {
                        'hidden_dropout_prob': dout_prob,
                        'attention_probs_dropout_prob': dout_prob,
                    }
                self.txt_bert = TxtBertModel.from_pretrained(
                    txt_bert_config,
                    cache_dir=
                    '/youtu_pedestrian_detection/wenzhewang/mmt_data/cache_dir',
                    **txt_bert_params)

                if state == 'frz':
                    if freeze_until:
                        # Freeze only certain layers
                        freeze_until = int(freeze_until)
                        logger.debug(
                            'Freezing text bert until layer %d excluded',
                            freeze_until)
                        # Freeze net until given layer
                        for name, param in self.txt_bert.named_parameters():
                            module = name.split('.')[0]
                            if name.split('.')[2].isdigit():
                                layer_nb = int(name.split('.')[2])
                            else:
                                continue
                            if module == 'encoder' and layer_nb in range(
                                    freeze_until):
                                param.requires_grad = False
                                logger.debug(name)
                    else:
                        # Freeze the whole model
                        for name, param in self.txt_bert.named_parameters():
                            module = name.split('.')[0]
                            if module == 'encoder':
                                param.requires_grad = False
                else:
                    assert not freeze_until

            if self.txt_inp == 'bertfrz':
                # Freeze model
                for param in self.txt_bert.embeddings.parameters():
                    param.requires_grad = False
            elif self.txt_inp not in ['bertftn']:
                logger.error('Wrong parameter for the text encoder')
            text_dim = self.txt_bert.config.hidden_size
        elif self.txt_agg in ['vlad', 'mxp', 'mnp', 'lstm']:
            # Need to get text embeddings
            if self.txt_inp == 'bertfrz':
                ckpt = '/youtu_pedestrian_detection/wenzhewang/mmt_data/word_embeddings/bert/ckpt_from_huggingface.pth'
                self.word_embeddings = TxtEmbeddings(ckpt=ckpt, freeze=True)
            elif self.txt_inp == 'bertftn':
                ckpt = '/youtu_pedestrian_detection/wenzhewang/mmt_data/word_embeddings/bert/ckpt_from_huggingface.pth'
                self.word_embeddings = TxtEmbeddings(ckpt=ckpt)
            elif self.txt_inp == 'bertscr':
                vocab_size = 28996
                emb_dim = 768
                self.word_embeddings = TxtEmbeddings(vocab_size, emb_dim)
            else:
                self.word_embeddings = tokenizer.we_model
            emb_dim = self.word_embeddings.text_dim

            if self.txt_agg == 'vlad':
                self.text_pooling = NetVLAD(
                    feature_size=emb_dim,
                    cluster_size=28,
                )
                text_dim = self.text_pooling.out_dim
            elif self.txt_agg == 'mxp':
                text_dim = emb_dim
            elif self.txt_agg == 'lstm':
                input_dim = self.word_embeddings.text_dim
                hidden_dim = 512
                layer_dim = 1
                output_dim = hidden_dim
                self.text_pooling = LSTMModel(input_dim, hidden_dim, layer_dim,
                                              output_dim)
                text_dim = output_dim

        self.text_gu = nn.ModuleDict()
        if self.txt_pro == 'gbn':
            for mod in self.text_modalities:
                self.text_gu[mod] = GatedEmbeddingUnit(
                    text_dim,
                    same_dim,
                    max_text_words=30,
                    dim=2,
                    use_bn=True,
                    normalize=self.normalize_experts)
            self.text_gu['align'] = GatedEmbeddingUnit(
                text_dim,
                same_dim,
                max_text_words=30,
                dim=3,
                use_bn=True,
                normalize=self.normalize_experts)
        elif self.txt_pro == 'gem':
            for mod in self.text_modalities:
                self.text_gu[mod] = GatedEmbeddingUnit(
                    text_dim,
                    same_dim,
                    max_text_words=30,
                    dim=2,
                    use_bn=False,
                    normalize=self.normalize_experts)
            self.text_gu['align'] = GatedEmbeddingUnit(
                text_dim,
                same_dim,
                max_text_words=30,
                dim=3,
                use_bn=False,
                normalize=self.normalize_experts)
        elif self.txt_pro == 'lin':
            for mod in self.text_modalities:
                self.text_gu[mod] = ReduceDim(text_dim, same_dim)
            self.text_gu['align'] = ReduceDim(text_dim, same_dim)

        # Weightening of each modality similarity
        if self.txt_wgh == 'emb':
            self.moe_fc_txt = nn.ModuleDict()
            dout_prob = txt_bert_params['hidden_dropout_prob']
            self.moe_txt_dropout = nn.Dropout(dout_prob)
            for mod in self.text_modalities:
                self.moe_fc_txt[mod] = nn.Linear(text_dim, 1)
        if self.vid_wgh == 'emb':
            self.moe_fc_vid = nn.ModuleDict()
            dout_prob = vid_bert_params['hidden_dropout_prob']
            self.moe_vid_dropout = nn.Dropout(dout_prob)
            for mod in self.modalities:
                self.moe_fc_vid[mod] = nn.Linear(self.same_dim, 1)

        self.debug_dataloader = False
        if self.debug_dataloader:
            self.tokenizer = tokenizer
示例#7
0
    def __init__(self, modalities, expert_dims, same_dim, vid_inp, vid_cont,
                 vid_wgh, vid_bert_params, pos_enc, out_tok,
                 keep_missing_modalities):
        """modalities: all modalities used to form video features
           expert_dims: dict, the feature dimension for each modality
           same_dim: the dimension of the common space
           vid_inp: video
           vid_cont: the model used to embed the features (coll: collaborative gating; bert)
           vid_wgh: the method to compute the weight
           pos_enc: used in vid_cont=bert"""
        super().__init__()

        self.modalities = modalities
        self.expert_dims = expert_dims
        self.same_dim = same_dim
        self.vid_inp = vid_inp
        self.vid_cont = vid_cont
        self.vid_wgh = vid_wgh
        self.vid_bert_params = vid_bert_params
        self.pos_enc = pos_enc
        self.out_tok = out_tok
        self.keep_missing_modalities = keep_missing_modalities

        self.video_dim_reduce = nn.ModuleDict()
        for mod in self.modalities:
            in_dim = expert_dims[mod]['dim']
            if self.vid_inp in ['agg', 'both', 'all', 'temp']:
                self.video_dim_reduce[mod] = ReduceDim(in_dim, same_dim)

        if self.vid_cont == 'coll':
            self.g_reason_1 = nn.Linear(same_dim * 2, same_dim)
            dout_prob = vid_bert_params['hidden_dropout_prob']
            self.coll_g_dropout = nn.Dropout(dout_prob)
            self.g_reason_2 = nn.Linear(same_dim, same_dim)

            self.f_reason_1 = nn.Linear(same_dim, same_dim)
            self.coll_f_dropout = nn.Dropout(dout_prob)
            self.f_reason_2 = nn.Linear(same_dim, same_dim)
            self.f_reason_3 = nn.Linear(same_dim, same_dim)
            self.batch_norm_g1 = nn.BatchNorm1d(same_dim)
            self.batch_norm_g2 = nn.BatchNorm1d(same_dim)

            self.batch_norm_f1 = nn.BatchNorm1d(same_dim)
            self.batch_norm_f2 = nn.BatchNorm1d(same_dim)

            self.video_GU = nn.ModuleDict()
            for mod in self.modalities:
                self.video_GU[mod] = GatedEmbeddingUnitReasoning(same_dim)

        # If Bert architecture is employed for video
        elif self.vid_cont == 'bert':
            vid_bert_config = types.SimpleNamespace(**vid_bert_params)
            self.vid_bert = BertModel(vid_bert_config)

        elif self.vid_cont == 'none':
            pass

        if self.vid_wgh == 'emb':
            self.moe_fc_vid = nn.ModuleDict()
            dout_prob = vid_bert_params['hidden_dropout_prob']
            self.moe_vid_dropout = nn.Dropout(dout_prob)
            for mod in self.modalities:
                self.moe_fc_vid[mod] = nn.Linear(self.same_dim, 1)
示例#8
0
def create_model(pyreader_name, bert_config, is_training=False):
    if is_training:
        pyreader = fluid.layers.py_reader(
            capacity=50,
            shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
                    [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
                    [-1, 1], [-1, 1]],
            dtypes=['int64', 'int64', 'int64', 'float32', 'int64', 'int64'],
            lod_levels=[0, 0, 0, 0, 0, 0],
            name=pyreader_name,
            use_double_buffer=True)
        (src_ids, pos_ids, sent_ids, input_mask, start_positions,
         end_positions) = fluid.layers.read_file(pyreader)
    else:
        pyreader = fluid.layers.py_reader(
            capacity=50,
            shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
                    [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
                    [-1, 1]],
            dtypes=['int64', 'int64', 'int64', 'float32', 'int64'],
            lod_levels=[0, 0, 0, 0, 0],
            name=pyreader_name,
            use_double_buffer=True)
        (src_ids, pos_ids, sent_ids, input_mask,
         unique_id) = fluid.layers.read_file(pyreader)

    bert = BertModel(src_ids=src_ids,
                     position_ids=pos_ids,
                     sentence_ids=sent_ids,
                     input_mask=input_mask,
                     config=bert_config,
                     use_fp16=args.use_fp16)

    enc_out = bert.get_sequence_output()

    logits = fluid.layers.fc(
        input=enc_out,
        size=2,
        num_flatten_dims=2,
        param_attr=fluid.ParamAttr(
            name="cls_squad_out_w",
            initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
        bias_attr=fluid.ParamAttr(name="cls_squad_out_b",
                                  initializer=fluid.initializer.Constant(0.)))

    logits = fluid.layers.transpose(x=logits, perm=[2, 0, 1])
    start_logits, end_logits = fluid.layers.unstack(x=logits, axis=0)

    batch_ones = fluid.layers.fill_constant_batch_size_like(input=start_logits,
                                                            dtype='int64',
                                                            shape=[1],
                                                            value=1)
    num_seqs = fluid.layers.reduce_sum(input=batch_ones)

    if is_training:

        def compute_loss(logits, positions):
            loss = fluid.layers.softmax_with_cross_entropy(logits=logits,
                                                           label=positions)
            loss = fluid.layers.mean(x=loss)
            return loss

        start_loss = compute_loss(start_logits, start_positions)
        end_loss = compute_loss(end_logits, end_positions)
        total_loss = (start_loss + end_loss) / 2.0
        if args.use_fp16 and args.loss_scaling > 1.0:
            total_loss = total_loss * args.loss_scaling

        return pyreader, total_loss, num_seqs
    else:
        return pyreader, unique_id, start_logits, end_logits, num_seqs
def create_model(bert_config, is_training=False):
    if is_training:
        input_fields = {
            'names': [
                'src_ids', 'pos_ids', 'sent_ids', 'input_mask',
                'start_positions', 'end_positions'
            ],
            'shapes': [[None, None], [None, None], [None, None],
                       [None, None, 1], [None, 1], [None, 1]],
            'dtypes': ['int64', 'int64', 'int64', 'float32', 'int64', 'int64'],
            'lod_levels': [0, 0, 0, 0, 0, 0],
        }
    else:
        input_fields = {
            'names':
            ['src_ids', 'pos_ids', 'sent_ids', 'input_mask', 'unique_id'],
            'shapes': [[None, None], [None, None], [None, None],
                       [None, None, 1], [None, 1]],
            'dtypes': ['int64', 'int64', 'int64', 'float32', 'int64'],
            'lod_levels': [0, 0, 0, 0, 0],
        }

    inputs = [
        fluid.data(name=input_fields['names'][i],
                   shape=input_fields['shapes'][i],
                   dtype=input_fields['dtypes'][i],
                   lod_level=input_fields['lod_levels'][i])
        for i in range(len(input_fields['names']))
    ]

    data_loader = fluid.io.DataLoader.from_generator(feed_list=inputs,
                                                     capacity=50,
                                                     iterable=False)

    if is_training:
        (src_ids, pos_ids, sent_ids, input_mask, start_positions,
         end_positions) = inputs
    else:
        (src_ids, pos_ids, sent_ids, input_mask, unique_id) = inputs

    bert = BertModel(src_ids=src_ids,
                     position_ids=pos_ids,
                     sentence_ids=sent_ids,
                     input_mask=input_mask,
                     config=bert_config,
                     use_fp16=args.use_fp16)

    enc_out = bert.get_sequence_output()

    logits = fluid.layers.fc(
        input=enc_out,
        size=2,
        num_flatten_dims=2,
        param_attr=fluid.ParamAttr(
            name="cls_squad_out_w",
            initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
        bias_attr=fluid.ParamAttr(name="cls_squad_out_b",
                                  initializer=fluid.initializer.Constant(0.)))

    logits = fluid.layers.transpose(x=logits, perm=[2, 0, 1])
    start_logits, end_logits = fluid.layers.unstack(x=logits, axis=0)

    batch_ones = fluid.layers.fill_constant_batch_size_like(input=start_logits,
                                                            dtype='int64',
                                                            shape=[1],
                                                            value=1)
    num_seqs = fluid.layers.reduce_sum(input=batch_ones)

    if is_training:

        def compute_loss(logits, positions):
            loss = fluid.layers.softmax_with_cross_entropy(logits=logits,
                                                           label=positions)
            loss = fluid.layers.mean(x=loss)
            return loss

        start_loss = compute_loss(start_logits, start_positions)
        end_loss = compute_loss(end_logits, end_positions)
        total_loss = (start_loss + end_loss) / 2.0
        return data_loader, total_loss, num_seqs
    else:
        return data_loader, unique_id, start_logits, end_logits, num_seqs
示例#10
0
def create_model(args, bert_config, num_labels, is_prediction=False):
    input_fields = {
        'names': ['src_ids', 'pos_ids', 'sent_ids', 'input_mask', 'labels'],
        'shapes':
        [[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
         [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], [-1, 1]],
        'dtypes': ['int64', 'int64', 'int64', 'float32', 'int64'],
        'lod_levels': [0, 0, 0, 0, 0],
    }

    inputs = [
        fluid.layers.data(
            name=input_fields['names'][i],
            shape=input_fields['shapes'][i],
            dtype=input_fields['dtypes'][i],
            lod_level=input_fields['lod_levels'][i])
        for i in range(len(input_fields['names']))
    ]
    (src_ids, pos_ids, sent_ids, input_mask, labels) = inputs

    pyreader = fluid.io.PyReader(feed_list=inputs, capacity=50, iterable=False)

    bert = BertModel(
        src_ids=src_ids,
        position_ids=pos_ids,
        sentence_ids=sent_ids,
        input_mask=input_mask,
        config=bert_config,
        use_fp16=args.use_fp16)

    cls_feats = bert.get_pooled_output()
    cls_feats = fluid.layers.dropout(
        x=cls_feats,
        dropout_prob=0.1,
        dropout_implementation="upscale_in_train")
    logits = fluid.layers.fc(
        input=cls_feats,
        size=num_labels,
        param_attr=fluid.ParamAttr(
            name="cls_out_w",
            initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
        bias_attr=fluid.ParamAttr(
            name="cls_out_b", initializer=fluid.initializer.Constant(0.)))

    if is_prediction:
        probs = fluid.layers.softmax(logits)
        feed_targets_name = [
            src_ids.name, pos_ids.name, sent_ids.name, input_mask.name
        ]
        return pyreader, probs, feed_targets_name

    ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
        logits=logits, label=labels, return_softmax=True)
    loss = fluid.layers.mean(x=ce_loss)

    if args.use_fp16 and args.loss_scaling > 1.0:
        loss *= args.loss_scaling

    num_seqs = fluid.layers.create_tensor(dtype='int64')
    accuracy = fluid.layers.accuracy(input=probs, label=labels, total=num_seqs)

    return pyreader, loss, probs, accuracy, num_seqs
示例#11
0
def create_model(pyreader_name, bert_config, is_training=False):
    if is_training:
        pyreader = fluid.layers.py_reader(
            capacity=50,
            shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
                    [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
                    [-1, 1], [-1, 1], [-1, args.max_seq_len],
                    [-1, args.max_seq_len], [-1, 1], [-1, args.max_seq_len]],
            dtypes=[
                'int64', 'int64', 'int64', 'float32', 'int64', 'int64',
                'float32', 'float32', 'float32', 'float32'
            ],
            lod_levels=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            name=pyreader_name,
            use_double_buffer=True)
        (src_ids, pos_ids, sent_ids, input_mask, start_positions,
         end_positions, KD_start_logits, KD_end_logits, la,
         loss_weights) = fluid.layers.read_file(pyreader)
    else:
        pyreader = fluid.layers.py_reader(
            capacity=50,
            shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
                    [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
                    [-1, 1]],
            dtypes=['int64', 'int64', 'int64', 'float32', 'int64'],
            lod_levels=[0, 0, 0, 0, 0],
            name=pyreader_name,
            use_double_buffer=True)
        (src_ids, pos_ids, sent_ids, input_mask,
         unique_id) = fluid.layers.read_file(pyreader)

    bert = BertModel(src_ids=src_ids,
                     position_ids=pos_ids,
                     sentence_ids=sent_ids,
                     input_mask=input_mask,
                     config=bert_config,
                     use_fp16=args.use_fp16)

    enc_out = bert.get_sequence_output()

    logits = fluid.layers.fc(
        input=enc_out,
        size=2,
        num_flatten_dims=2,
        param_attr=fluid.ParamAttr(
            name="cls_squad_out_w",
            initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
        bias_attr=fluid.ParamAttr(name="cls_squad_out_b",
                                  initializer=fluid.initializer.Constant(0.)))

    logits = fluid.layers.transpose(x=logits, perm=[2, 0, 1])
    start_logits, end_logits = fluid.layers.unstack(x=logits, axis=0)

    batch_ones = fluid.layers.fill_constant_batch_size_like(input=start_logits,
                                                            dtype='int64',
                                                            shape=[1],
                                                            value=1)
    num_seqs = fluid.layers.reduce_sum(input=batch_ones)

    if is_training:

        def compute_loss(logits, positions, loss_weights):
            logits = fluid.layers.softmax(logits)
            loss = fluid.layers.cross_entropy(
                input=logits, label=positions) * loss_weights[:, 0]
            loss = fluid.layers.mean(x=loss)
            return loss

        # KLloss_start = fluid.layers.kldiv_loss(x=start_logits, target=KD_start_logits, reduction='mean')
        # KLloss_end = fluid.layers.kldiv_loss(x=end_logits, target=KD_end_logits, reduction='mean')
        # KLloss = (KLloss_start + KLloss_end) / 2.0

        KD_loss_mask = fluid.layers.cast(KD_start_logits < 999999999, 'int64')

        def diff_loss(batched_a, batched_b, KD_loss_mask, loss_weights):
            diff = batched_a - batched_b
            loss = diff * diff * KD_loss_mask * loss_weights
            loss = fluid.layers.reduce_sum(loss) / fluid.layers.reduce_sum(
                KD_loss_mask)
            return loss

        start_loss = compute_loss(start_logits, start_positions, loss_weights)
        end_loss = compute_loss(end_logits, end_positions, loss_weights)

        KDloss_start = diff_loss(start_logits, KD_start_logits, KD_loss_mask,
                                 loss_weights)
        KDloss_end = diff_loss(end_logits, KD_end_logits, KD_loss_mask,
                               loss_weights)
        KDloss = (KDloss_start + KDloss_end) / 2.0

        total_loss = (1 - la) * (start_loss + end_loss) / 2.0 + la * KDloss
        if args.use_fp16 and args.loss_scaling > 1.0:
            total_loss = total_loss * args.loss_scaling

        return pyreader, total_loss, num_seqs
    else:
        return pyreader, unique_id, start_logits, end_logits, num_seqs
示例#12
0
 def __init__(self, bert_config, num_labels):
     super(Classifier, self).__init__()
     self.bert = BertModel(bert_config)
     self.cls_out = nn.Linear(bert_config['hidden_size'], num_labels)
示例#13
0
def create_model(args,
                 pyreader_name,
                 bert_config,
                 num_labels,
                 is_prediction=False):
    """
    define fine-tuning model
    """
    if args.binary:
        pyreader = fluid.layers.py_reader(
            capacity=50,
            shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
                    [-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
                    [-1, 1], [-1, 1]],
            dtypes=['int64', 'int64', 'int64', 'float32', 'int64', 'int64'],
            lod_levels=[0, 0, 0, 0, 0, 0],
            name=pyreader_name,
            use_double_buffer=True)

    (src_ids, pos_ids, sent_ids, input_mask, seq_len,
     labels) = fluid.layers.read_file(pyreader)

    bert = BertModel(
        src_ids=src_ids,
        position_ids=pos_ids,
        sentence_ids=sent_ids,
        input_mask=input_mask,
        config=bert_config,
        use_fp16=args.use_fp16)

    if args.sub_model_type == 'raw':
        cls_feats = bert.get_pooled_output()

    elif args.sub_model_type == 'cnn':
        bert_seq_out = bert.get_sequence_output()
        bert_seq_out = fluid.layers.sequence_unpad(bert_seq_out, seq_len)
        cnn_hidden_size = 100
        convs = []
        for h in [3, 4, 5]:
            conv_feats = fluid.layers.sequence_conv(
                input=bert_seq_out, num_filters=cnn_hidden_size, filter_size=h)
            conv_feats = fluid.layers.batch_norm(input=conv_feats, act="relu")
            conv_feats = fluid.layers.sequence_pool(
                input=conv_feats, pool_type='max')
            convs.append(conv_feats)

        cls_feats = fluid.layers.concat(input=convs, axis=1)

    elif args.sub_model_type == 'gru':
        bert_seq_out = bert.get_sequence_output()
        bert_seq_out = fluid.layers.sequence_unpad(bert_seq_out, seq_len)
        gru_hidden_size = 1024
        gru_input = fluid.layers.fc(input=bert_seq_out,
                                    size=gru_hidden_size * 3)
        gru_forward = fluid.layers.dynamic_gru(
            input=gru_input, size=gru_hidden_size, is_reverse=False)
        gru_backward = fluid.layers.dynamic_gru(
            input=gru_input, size=gru_hidden_size, is_reverse=True)
        gru_output = fluid.layers.concat([gru_forward, gru_backward], axis=1)
        cls_feats = fluid.layers.sequence_pool(
            input=gru_output, pool_type='max')

    elif args.sub_model_type == 'ffa':
        bert_seq_out = bert.get_sequence_output()
        attn = fluid.layers.fc(input=bert_seq_out,
                               num_flatten_dims=2,
                               size=1,
                               act='tanh')
        attn = fluid.layers.softmax(attn)
        weighted_input = bert_seq_out * attn
        weighted_input = fluid.layers.sequence_unpad(weighted_input, seq_len)
        cls_feats = fluid.layers.sequence_pool(weighted_input, pool_type='sum')

    else:
        raise NotImplementedError("%s is not implemented!" %
                                  args.sub_model_type)

    cls_feats = fluid.layers.dropout(
        x=cls_feats,
        dropout_prob=0.1,
        dropout_implementation="upscale_in_train")

    logits = fluid.layers.fc(
        input=cls_feats,
        size=num_labels,
        param_attr=fluid.ParamAttr(
            name="cls_out_w",
            initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
        bias_attr=fluid.ParamAttr(
            name="cls_out_b", initializer=fluid.initializer.Constant(0.)))
    probs = fluid.layers.softmax(logits)

    if is_prediction:
        feed_targets_name = [
            src_ids.name, pos_ids.name, sent_ids.name, input_mask.name
        ]
        return pyreader, probs, feed_targets_name

    ce_loss = fluid.layers.softmax_with_cross_entropy(
        logits=logits, label=labels)
    loss = fluid.layers.mean(x=ce_loss)

    if args.use_fp16 and args.loss_scaling > 1.0:
        loss *= args.loss_scaling

    num_seqs = fluid.layers.create_tensor(dtype='int64')
    accuracy = fluid.layers.accuracy(input=probs, label=labels, total=num_seqs)

    return (pyreader, loss, probs, accuracy, labels, num_seqs)
示例#14
0
def create_model(args,
                 bert_config,
                 num_labels,
                 is_prediction=False,
                 k=0,
                 n=0,
                 q=0,
                 task_name=""):
    input_fields = {
        'names': ['src_ids', 'pos_ids', 'sent_ids', 'input_mask', 'labels'],
        'shapes': [[None, None], [None, None], [None, None], [None, None, 1],
                   [None, 1]],
        'dtypes': ['int64', 'int64', 'int64', 'float32', 'int64'],
        'lod_levels': [0, 0, 0, 0, 0],
    }

    inputs = [
        fluid.data(name=input_fields['names'][i],
                   shape=input_fields['shapes'][i],
                   dtype=input_fields['dtypes'][i],
                   lod_level=input_fields['lod_levels'][i])
        for i in range(len(input_fields['names']))
    ]
    (src_ids, pos_ids, sent_ids, input_mask, labels) = inputs

    data_loader = fluid.io.DataLoader.from_generator(feed_list=inputs,
                                                     capacity=50,
                                                     iterable=True)

    bert = BertModel(src_ids=src_ids,
                     position_ids=pos_ids,
                     sentence_ids=sent_ids,
                     input_mask=input_mask,
                     config=bert_config,
                     use_fp16=args.use_fp16)

    cls_feats = bert.get_pooled_output()
    cls_feats = fluid.layers.dropout(x=cls_feats,
                                     dropout_prob=0.1,
                                     dropout_implementation="upscale_in_train")
    hidden = fluid.layers.fc(
        input=cls_feats,
        num_flatten_dims=2,
        size=num_labels,
        param_attr=fluid.ParamAttr(
            name="cls_out_w",
            initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
        bias_attr=fluid.ParamAttr(name="cls_out_b",
                                  initializer=fluid.initializer.Constant(0.)))

    if is_prediction:
        probs = fluid.layers.softmax(logits)
        feed_targets_name = [
            src_ids.name, pos_ids.name, sent_ids.name, input_mask.name
        ]
        return data_loader, probs, feed_targets_name

    #fluid.layers.Print(hidden)
    logits = fluid.layers.softmax(hidden)
    if task_name == "fewshot":
        #fluid.layers.Print(logits)
        #fluid.layers.Print(labels)
        logits = fluid.layers.reshape(hidden, [-1, num_labels], inplace=True)
        logits = fluid.layers.reshape(logits, [-1, q * k, k, n, 2],
                                      inplace=True)
        logits = fluid.layers.reduce_mean(logits, dim=3, keep_dim=False)
        logits = logits[:, :, :, 1]
        logits = fluid.layers.reshape(logits, [-1, q * k, k], inplace=True)
        logits = fluid.layers.reshape(logits, [-1, k], inplace=True)
        ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
            logits=logits, label=labels, return_softmax=True)
        loss = fluid.layers.mean(x=ce_loss)
        num_seqs = fluid.layers.create_tensor(dtype='int64')
        accuracy = fluid.layers.accuracy(input=probs,
                                         label=labels,
                                         k=1,
                                         total=num_seqs)
        return data_loader, loss, probs, accuracy, num_seqs
    elif task_name == "fintune":
        #fluid.layers.Print(labels)
        logits = fluid.layers.reshape(hidden, [-1, num_labels], inplace=True)
        #fluid.layers.Print(logits)
        ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
            logits=logits, label=labels, return_softmax=True)
        #fluid.layers.Print(ce_loss)
        loss = fluid.layers.mean(x=ce_loss)
        num_seqs = fluid.layers.create_tensor(dtype='int64')
        accuracy = fluid.layers.accuracy(input=probs,
                                         label=labels,
                                         k=1,
                                         total=num_seqs)
        return data_loader, loss, probs, accuracy, num_seqs
    else:
        return