示例#1
0
def load_model_decode(model_dir, data, name, gpu, seg=True):
    data.HP_gpu = gpu
    print("Load Model from file: ", model_dir)

    model = None
    if data.model_name == 'WC-LSTM_model':
        model = CW_NER(data)
    elif data.model_name == 'CNN_model':
        model = CNNmodel(data)
    elif data.model_name == 'LSTM_model':
        model = BiLSTM_CRF(data)
    assert (model is not None)
    model.load_state_dict(torch.load(model_dir))

    print("Decode %s data ..." % name)
    start_time = time.time()
    speed, acc, p, r, f, pred_results = evaluate(data, model, name)
    end_time = time.time()
    time_cost = end_time - start_time

    # seg: boolen.
    # If task is segmentation like, tasks with token accuracy evaluation (e.g. POS, CCG) is False;
    # tasks with F-value evaluation(e.g. Word Segmentation, NER, Chunking) is True .
    if seg:
        print(
            "%s: time:%.2fs, speed:%.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
            % (name, time_cost, speed, acc, p, r, f))
    else:
        print("%s: time:%.2fs, speed:%.2fst/s; acc: %.4f" %
              (name, time_cost, speed, acc))
    return pred_results
示例#2
0
def train(data, save_model_dir, dset_dir, seg=True):
    print("Training model...")
    data.show_data_summary()
    save_data_setting(data, dset_dir)
    model = None
    if data.model_name == 'WC-LSTM_model':
        model = CW_NER(data, type=2)
    elif data.model_name == 'CNN_model':
        model = CNNmodel(data)
    elif data.model_name == 'LSTM_model':
        model = BiLSTM_CRF(data)
    assert (model is not None)

    print("finished built model.")
    # loss_function = nn.NLLLoss()
    # requires_grad指定要不要更新這個變數 属性默认为False 可以加快運算
    parameters = filter(lambda p: p.requires_grad, model.parameters())

    # SGD: Stochastic gradient descent
    # 每读入一个数据,便立刻计算cost fuction的梯度来更新参数
    # 算法收敛速度快 可以在线更新 有几率跳出较差的局部最优
    # 易收敛到局部最优,易被困在鞍点
    # 更新方向完全依赖于当前batch计算出的梯度,因而十分不稳定
    #
    # SGD+momentum
    # 更新的时候在一定程度上保留之前更新的方向,同时利用当前batch的梯度微调最终的更新方向
    # 在一定程度上增加稳定性,从而学习地更快,并且还有一定摆脱局部最优的能力
    # optim: SGD/Adagrad/AdaDelta/RMSprop/Adam. optimizer selection.
    # optimizer = optim.SGD(parameters, lr=data.HP_lr, momentum=data.HP_momentum)
    optimizer = optim.Adagrad(parameters, lr=data.HP_lr)
    best_dev = -1

    # training
    for idx in tqdm(range(data.HP_iteration)):
        epoch_start = time.time()
        print("\nEpoch: %s/%s" % (idx, data.HP_iteration))
        optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr)
        instance_count = 0
        batch_loss = 0
        total_loss = 0
        right_token = 0
        whole_token = 0
        random.shuffle(data.train_Ids)
        model.train()
        model.zero_grad()
        batch_size = data.HP_batch_size
        train_num = len(data.train_Ids)
        total_batch = train_num // batch_size + 1
        for batch_id in range(total_batch):
            start = batch_id * batch_size
            end = (batch_id + 1) * batch_size
            if end > train_num:
                end = train_num
            instance = data.train_Ids[start:end]
            if not instance:
                continue
            tag_seq, batch_label, mask, loss = None, None, None, None
            if data.model_name == 'WC-LSTM_model':
                gaz_list, reverse_gaz_list, batch_char, batch_bichar, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label_3(
                    instance, data.HP_gpu, data.HP_num_layer)
                instance_count += 1
                loss, tag_seq = model.neg_log_likelihood_loss(
                    gaz_list, reverse_gaz_list, batch_char, batch_charlen,
                    batch_label, mask)
            elif data.model_name == 'CNN_model':
                gaz_list, batch_char, batch_bichar, batch_charlen, batch_label, layer_gaz, gaz_mask, mask = batchify_with_label_2(
                    instance, data.HP_gpu, data.HP_num_layer)
                instance_count += 1
                loss, tag_seq = model.neg_log_likelihood_loss(
                    gaz_list, batch_char, batch_bichar, batch_charlen,
                    layer_gaz, gaz_mask, mask, batch_label)
            elif data.model_name == 'LSTM_model':
                gaz_list, batch_char, batch_bichar, batch_charlen, batch_wordrecover, batch_label, mask = batchify_with_label(
                    instance, data.HP_gpu, data.HP_num_layer)
                instance_count += 1
                loss, tag_seq = model.neg_log_likelihood_loss(
                    gaz_list, batch_char, batch_bichar, batch_charlen,
                    batch_label, mask)
            assert (loss.size != torch.Size([]))
            right, whole = predict_check(tag_seq, batch_label, mask)
            right_token += right
            whole_token += whole
            total_loss += loss.item()
            batch_loss += loss
            if end % data.HP_clip == 0:
                batch_loss.backward()
                optimizer.step()
                model.zero_grad()
                batch_loss = 0

        epoch_finish = time.time()
        epoch_cost = epoch_finish - epoch_start
        print(
            "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s,  total loss: %s"
            % (idx, epoch_cost, train_num / epoch_cost, total_loss))

        speed, acc, p, r, f, _ = evaluate(data, model, "dev")
        dev_finish = time.time()
        dev_cost = dev_finish - epoch_finish

        current_score = f if seg else acc
        if seg:
            print(
                "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                % (dev_cost, speed, acc, p, r, f))
        else:
            print("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" %
                  (dev_cost, speed, acc))

        if current_score > best_dev:
            if seg:
                print(
                    "Exceed previous best f score: %.4f, \033[35mnew best f: %.4f\033[0m"
                    % (best_dev, current_score))
            else:
                print(
                    "Exceed previous best acc score:%.4f, \033[35mnew best acc: %.4f\033[0m"
                    % (best_dev, current_score))
            save_model_name = save_model_dir + '-' + str(idx) + '-' + str(
                round(current_score * 100, 1)) + ".model"
            torch.save(model.state_dict(), save_model_name)
            best_dev = current_score
        speed, acc, p, r, f, _ = evaluate(data, model, "test")
        test_finish = time.time()
        test_cost = test_finish - dev_finish
        if seg:
            print(
                "Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                % (test_cost, speed, acc, p, r, f))
        else:
            print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f" %
                  (test_cost, speed, acc))
        gc.collect()