Пример #1
0
def train(model, iterator, optimizer, criterion):
    model.train()
    for i, batch in enumerate(iterator):
        words, x, is_heads, tags, y, seqlens = batch
        _y = y # for monitoring
        optimizer.zero_grad()
        logits, y, _ = model(x, y) # logits: (N, T, VOCAB), y: (N, T)

        logits = logits.view(-1, logits.shape[-1]) # (N*T, VOCAB)
        y = y.view(-1)  # (N*T,)

        loss = criterion(logits, y)
        loss.backward()

        optimizer.step()

        if i==0:
            print("=====sanity check======")
            print("words:", words[0])
            print("x:", x.cpu().numpy()[0][:seqlens[0]])
            print("tokens:", tokenizer.convert_ids_to_tokens(x.cpu().numpy()[0])[:seqlens[0]])
            print("is_heads:", is_heads[0])
            print("y:", _y.cpu().numpy()[0][:seqlens[0]])
            print("tags:", tags[0])
            print("seqlen:", seqlens[0])
            print("=======================")


        if i%10==0: # monitoring
            print(f"step: {i}, loss: {loss.item()}")
Пример #2
0
def train(model, iterator, optimizer, criterion, sanity_check=False):
    model.train()
    print("=======================")
    print("Training started at {}".format(time.asctime(time.localtime(time.time()))))
    for i, batch in enumerate(iterator):
        words, x, is_heads, tags, y, seqlens = batch
        _y = y # for monitoring
        optimizer.zero_grad()
        logits, y, _ = model(x, y) # logits: (N, T, VOCAB), y: (N, T)

        logits = logits.view(-1, logits.shape[-1]) # (N*T, VOCAB)
        y = y.view(-1)  # (N*T,)

        loss = criterion(logits, y)
        loss.backward()

        optimizer.step()

        if sanity_check:
            if i==0:
                print("=====sanity check======")
                print("words:", words[0])
                print("x:", x.cpu().numpy()[0][:seqlens[0]])
                print("tokens:", tokenizer.convert_ids_to_tokens(x.cpu().numpy()[0])[:seqlens[0]])
                print("is_heads:", is_heads[0])
                print("y:", _y.cpu().numpy()[0][:seqlens[0]])
                print("tags:", tags[0])
                print("seqlen:", seqlens[0])
                print("=======================")
            
        if i%100==0: # monitoring
            print(f"time: {time.asctime(time.localtime(time.time()))}, step: {i}, loss: {loss.item()}")

    print(f"Finished {i} steps training at {time.asctime(time.localtime(time.time()))}\n")
    return loss.item()
Пример #3
0
def train(model, iterator, optimizer, criterion):
    model.train()
    for i, batch in enumerate(iterator):
        tokens_x_2d, entities_x_3d, triggers_y_2d, arguments_y_2d, seqlens_1d, is_heads_2d, words_2d, triggers_2d = batch
        optimizer.zero_grad()
        logits, y, y_hat = model(tokens_x_2d, entities_x_3d, triggers_y_2d)

        logits = logits.view(-1, logits.shape[-1])
        y = y.view(-1)

        loss = criterion(logits, y)
        nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        loss.backward()

        optimizer.step()

        if i == 0:
            print("=====sanity check======")
            print("tokens:", tokenizer.convert_ids_to_tokens(tokens_x_2d[0])[:seqlens_1d[0]])
            print("entities_x_3d:", entities_x_3d[0][:seqlens_1d[0]])
            print("is_heads:", is_heads_2d[0])
            print("triggers:", triggers_2d[0])
            print("triggers_y:", triggers_y_2d[0][:seqlens_1d[0]])
            print('triggers_y_hat:', y_hat.cpu().numpy().tolist()[0][:seqlens_1d[0]])
            print("seqlen:", seqlens_1d[0])
            print("=======================")

        if i % 10 == 0:  # monitoring
            print("step: {}, loss: {}".format(i, loss.item()))
Пример #4
0
def train(model, iterator, optimizer, criterion):
    model.train()
    for i, batch in enumerate(iterator):
        tokens_x_2d, entities_x_3d, postags_x_2d, triggers_y_2d, arguments_2d, seqlens_1d, head_indexes_2d, words_2d, triggers_2d = batch
        optimizer.zero_grad()
        trigger_logits, triggers_y_2d, trigger_hat_2d, argument_hidden, argument_keys = model.module.predict_triggers(
            tokens_x_2d=tokens_x_2d,
            entities_x_3d=entities_x_3d,
            postags_x_2d=postags_x_2d,
            head_indexes_2d=head_indexes_2d,
            triggers_y_2d=triggers_y_2d,
            arguments_2d=arguments_2d)

        trigger_logits = trigger_logits.view(-1, trigger_logits.shape[-1])
        trigger_loss = criterion(trigger_logits, triggers_y_2d.view(-1))

        if len(argument_keys) > 0:
            argument_logits, arguments_y_1d, argument_hat_1d, argument_hat_2d = model.module.predict_arguments(
                argument_hidden, argument_keys, arguments_2d)
            argument_loss = criterion(argument_logits, arguments_y_1d)
            loss = trigger_loss + 2 * argument_loss
            if i == 0:
                print("=====sanity check for arguments======")
                print('arguments_y_1d:', arguments_y_1d)
                print("arguments_2d[0]:", arguments_2d[0]['events'])
                print("argument_hat_2d[0]:", argument_hat_2d[0]['events'])
                print("=======================")
        else:
            loss = trigger_loss

        nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        loss.backward()

        optimizer.step()

        if i == 0:
            print("=====sanity check======")
            print(
                "tokens_x_2d[0]:",
                tokenizer.convert_ids_to_tokens(
                    tokens_x_2d[0])[:seqlens_1d[0]])
            print("entities_x_3d[0]:", entities_x_3d[0][:seqlens_1d[0]])
            print("postags_x_2d[0]:", postags_x_2d[0][:seqlens_1d[0]])
            print("head_indexes_2d[0]:", head_indexes_2d[0][:seqlens_1d[0]])
            print("triggers_2d[0]:", triggers_2d[0])
            print("triggers_y_2d[0]:",
                  triggers_y_2d.cpu().numpy().tolist()[0][:seqlens_1d[0]])
            print('trigger_hat_2d[0]:',
                  trigger_hat_2d.cpu().numpy().tolist()[0][:seqlens_1d[0]])
            print("seqlens_1d[0]:", seqlens_1d[0])
            print("arguments_2d[0]:", arguments_2d[0])
            print("=======================")

        if i % 10 == 0:  # monitoring
            print("step: {}, loss: {}".format(i, loss.item()))
Пример #5
0
def train(model, iterator, optimizer, criterio):
    model.train()
    # batch_size 24 step 600
    for i, batch in enumerate(iterator):
        #
        tokens_x_2d, entities_x_3d, postags_x_2d, triggers_y_2d, arguments_2d, seqlens_1d, head_indexes_2d, words_2d, triggers_2d, \
        pre_sent_tokens_x, next_sent_tokens_x, pre_sent_len, next_sent_len, maxlen = batch

        # maxlen = max(seqlens_1d)
        # pre_sent_len_max = max(pre_sent_len)
        # next_sent_len_max = max(next_sent_len)

        pre_sent_flags = []
        next_sent_flags = []

        pre_sent_len_mat = []
        next_sent_len_mat = []

        for i in pre_sent_len:
            tmp = [[1] * 768] * i + [[0] * 768] * (maxlen - i)
            pre_sent_flags.append(tmp)
            pre_sent_len_mat.append([i] * 768)

        for i in next_sent_len:
            tmp = [[1] * 768] * i + [[0] * 768] * (maxlen - i)
            next_sent_flags.append(tmp)
            next_sent_len_mat.append([i] * 768)

        optimizer.zero_grad()
        # trigger_logits, triggers_y_2d, trigger_hat_2d, argument_hidden, argument_keys = model.module.predict_triggers(tokens_x_2d=tokens_x_2d, entities_x_3d=entities_x_3d,
        trigger_logits, triggers_y_2d, trigger_hat_2d = model.predict_triggers(
            tokens_x_2d=tokens_x_2d,
            entities_x_3d=entities_x_3d,
            postags_x_2d=postags_x_2d,
            head_indexes_2d=head_indexes_2d,
            triggers_y_2d=triggers_y_2d,
            arguments_2d=arguments_2d,
            pre_sent_tokens_x=pre_sent_tokens_x,
            next_sent_tokens_x=next_sent_tokens_x,
            pre_sent_flags=pre_sent_flags,
            next_sent_flags=next_sent_flags,
            pre_sent_len_mat=pre_sent_len_mat,
            next_sent_len_mat=next_sent_len_mat)
        # print("trigger_logits 1 " + "="*100)
        # print(trigger_logits.shape)
        trigger_logits = trigger_logits.view(-1, trigger_logits.shape[-1])

        print("trigger_check " + "=" * 100)
        print(torch.argmax(trigger_logits, 1).to('cpu').numpy().tolist())
        print(triggers_y_2d.view(1, -1).to('cpu').numpy().tolist())
        # print("trigger_logits 2 " + "="*100)
        # print(trigger_logits.shape)
        # print("triggers_y_2d 1 " + "="*100)
        # print(triggers_y_2d.shape)
        # print("triggers_y_2d 2 " + "=" * 100)
        # print(triggers_y_2d.view(-1).shape)
        # 这里计算损失函数很有意思,本来model输出的结果trigger_logits: batch_size * sentence_len * 分类数 ,标准结果是 batch_size * sentence_len; 这里句子中每个词对应一个标签
        # 然后直接将 batch_size和sentence合在一起了,不再区分词是哪个句子中的,直接计算整个batch中词分类的效果
        # CrossEntropyLoss的输入: 预测结果(2维): batch_size * 分类数    标准结果(一维):  batch_size ; 当然具体在这里不是batch_size 而是 batch_size * sentence_len
        trigger_loss = criterion(trigger_logits, triggers_y_2d.view(-1))
        # print("triggers_loss 1 " + "=" * 100)
        # print(trigger_loss.shape)

        loss = trigger_loss

        # 梯度裁剪
        # nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        loss.backward()

        optimizer.step()

        if i == 0:
            print("=====sanity check======")
            print(
                "tokens_x_2d[0]:",
                tokenizer.convert_ids_to_tokens(
                    tokens_x_2d[0])[:seqlens_1d[0]])
            print("entities_x_3d[0]:", entities_x_3d[0][:seqlens_1d[0]])
            print("postags_x_2d[0]:", postags_x_2d[0][:seqlens_1d[0]])
            print("head_indexes_2d[0]:", head_indexes_2d[0][:seqlens_1d[0]])
            print("triggers_2d[0]:", triggers_2d[0])
            print("triggers_y_2d[0]:",
                  triggers_y_2d.cpu().numpy().tolist()[0][:seqlens_1d[0]])
            print('trigger_hat_2d[0]:',
                  trigger_hat_2d.cpu().numpy().tolist()[0][:seqlens_1d[0]])
            print("seqlens_1d[0]:", seqlens_1d[0])
            print("arguments_2d[0]:", arguments_2d[0])
            print("=======================")

        if i % 10 == 0:  # monitoring
            print("step: {}, loss: {}".format(i, loss.item()))
def train(model, iterator, optimizer, criterion):
    model.train()
    decision_criterion = nn.BCEWithLogitsLoss()
    for i, batch in enumerate(iterator):
        tokens_x_2d, entities_x_3d, postags_x_2d, triggers_y_2d, arguments_2d, seqlens_1d, head_indexes_2d, words_2d, triggers_2d = batch
        optimizer.zero_grad()
        trigger_logits, triggers_y_2d, trigger_hat_2d, argument_hidden, argument_keys, trigger_info, auxiliary_feature = model.module.predict_triggers(tokens_x_2d=tokens_x_2d, entities_x_3d=entities_x_3d,
                                                                  postags_x_2d=postags_x_2d, head_indexes_2d=head_indexes_2d,
                                                                  triggers_y_2d=triggers_y_2d, arguments_2d=arguments_2d)

        trigger_logits = trigger_logits.view(-1, trigger_logits.shape[-1])
        trigger_loss = criterion(trigger_logits, triggers_y_2d.view(-1))
        loss = trigger_loss

        # if len(argument_keys) > 0:
        #     argument_logits, arguments_y_1d, argument_hat_1d, argument_hat_2d = model.module.predict_arguments(argument_hidden, argument_keys, arguments_2d)
        #     argument_loss = criterion(argument_logits, arguments_y_1d)
        #     input('Look at batch shape')
        #     print(arguments_y_1d.shape)
        #     input('The shape of argument y 1d')
        #     loss = trigger_loss + 2 * argument_loss
        #     if i == 0:
        #         print("=====sanity check for arguments======")
        #         print('arguments_y_1d:', arguments_y_1d)
        #         print("arguments_2d[0]:", arguments_2d[0]['events'])
        #         print("argument_hat_2d[0]:", argument_hat_2d[0]['events'])
        #         print("=======================")
        # else:
        #     loss = trigger_loss

        for module_arg in ARGUMENTS:
            if len(argument_keys) > 0:
                argument_logits, arguments_y_1d, argument_hat_1d, argument_hat_2d = model.module.module_predict_arguments(argument_hidden, argument_keys, arguments_2d, module_arg)
                argument_loss = criterion(argument_logits, arguments_y_1d)
                loss += 2 * argument_loss

                # meta classifier
                module_decisions_logit, module_decisions_y, argument_hat_2d = model.module.meta_classifier(argument_keys, arguments_2d, trigger_info, argument_logits, argument_hat_1d, auxiliary_feature, module_arg)
        
                module_decisions_logit = module_decisions_logit.view(-1)
                # print('Module decision logit \n {}'.format(module_decisions_logit))
                # print('Module decision true \n {}'.format(module_decisions_y))
                # print('module decision logit shape={}'.format(module_decisions_logit.shape))
                # print('module decision true shape={}'.format(module_decisions_y.shape))
                # input('')
                decision_loss = decision_criterion(module_decisions_logit, module_decisions_y)
                loss += 2 * decision_loss
        if i == 0:
            print("=====sanity check for arguments======")
            print('arguments_y_1d:', arguments_y_1d)
            print("arguments_2d[0]:", arguments_2d[0]['events'])
            print("argument_hat_2d[0]:", argument_hat_2d[0]['events'])
            print("module decision y = {}".format(module_decisions_y))
            print("module decision pred = {}".format(torch.round(torch.sigmoid(module_decisions_logit))))
            print("=======================")
          #else:
              #loss = trigger_loss


        nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        loss.backward()

        optimizer.step()

        if i == 0:
            print("=====sanity check======")
            print("tokens_x_2d[0]:", tokenizer.convert_ids_to_tokens(tokens_x_2d[0])[:seqlens_1d[0]])
            print("entities_x_3d[0]:", entities_x_3d[0][:seqlens_1d[0]])
            print("postags_x_2d[0]:", postags_x_2d[0][:seqlens_1d[0]])
            print("head_indexes_2d[0]:", head_indexes_2d[0][:seqlens_1d[0]])
            print("triggers_2d[0]:", triggers_2d[0])
            print("triggers_y_2d[0]:", triggers_y_2d.cpu().numpy().tolist()[0][:seqlens_1d[0]])
            print('trigger_hat_2d[0]:', trigger_hat_2d.cpu().numpy().tolist()[0][:seqlens_1d[0]])
            print("seqlens_1d[0]:", seqlens_1d[0])
            print("arguments_2d[0]:", arguments_2d[0])
            print("=======================")

        if i % 100 == 0:  # monitoring
            print("step: {}, loss: {}".format(i, loss.item()))