예제 #1
0
def evaluate(data, model, name):
    if name == "train":
        instances = data.train_Ids
    elif name == "dev":
        instances = data.dev_Ids
    elif name == 'test':
        instances = data.test_Ids
    elif name == 'raw':
        instances = data.raw_Ids
    else:
        print("Error: wrong evaluate name,", name)
    right_token = 0
    whole_token = 0
    pred_results = []
    gold_results = []
    ## set model in eval model
    model.eval()
    batch_size = 1
    start_time = time.time()
    train_num = len(instances)
    total_batch = train_num // batch_size + 1
    gazes = []
    for batch_id in range(total_batch):
        with torch.no_grad():
            start = batch_id * batch_size
            end = (batch_id + 1) * batch_size
            if end > train_num:
                end = train_num
            instance = instances[start:end]
            if not instance:
                continue
            gaz_list, batch_word, batch_biword, batch_wordlen, batch_label, layer_gaz, gaz_count, gaz_chars, gaz_mask, gazchar_mask, mask, batch_bert, bert_mask = batchify_with_label(
                instance, data.HP_gpu, data.HP_num_layer, True)
            tag_seq, gaz_match = model(gaz_list, batch_word, batch_biword,
                                       batch_wordlen, layer_gaz, gaz_count,
                                       gaz_chars, gaz_mask, gazchar_mask, mask,
                                       batch_bert, bert_mask)

            gaz_list = [
                data.gaz_alphabet.get_instance(id) for batchlist in gaz_match
                if len(batchlist) > 0 for id in batchlist
            ]
            gazes.append(gaz_list)

            if name == "dev":
                pred_label, gold_label = recover_label(tag_seq, batch_label,
                                                       mask,
                                                       data.label_alphabet)
            else:
                pred_label, gold_label = recover_label(tag_seq, batch_label,
                                                       mask,
                                                       data.label_alphabet)
            pred_results += pred_label
            gold_results += gold_label
    decode_time = time.time() - start_time
    speed = len(instances) / decode_time
    acc, p, r, f = get_ner_fmeasure(gold_results, pred_results, data.tagScheme)
    return speed, acc, p, r, f, pred_results, gazes
예제 #2
0
def evaluate(data, model, name, nbest=None, label_flag=True):
    ''' Evaluation of the model on a test data (or raw data wo labels)
    '''
    if name == "train":
        instances = data.train_Ids
    elif name == "dev":
        instances = data.dev_Ids
    elif name == "test":
        instances = data.test_Ids
    elif name == "raw":
        instances = data.raw_Ids
    else:
        print("Error: wrong evaluate name,", name)
        exit(1)
    right_token = 0
    whole_token = 0
    nbest_pred_results = []
    pred_scores = []
    pred_results = []
    gold_results = []
    ## set model in eval mode
    model.eval()
    batch_size = data.batch_size
    start_time = time.time()
    train_num = len(instances)
    total_batch = train_num // batch_size + 1
    for batch_id in range(total_batch):
        start = batch_id * batch_size
        end = (batch_id + 1) * batch_size
        if end > train_num:
            end = train_num
        instance = instances[start:end]
        if not instance:
            continue
        batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(
            instance, data.HP_gpu, True, label_flag=label_flag)
        # here in ncrfpp code, there is a nbest condition that I wont code
        tag_seq = model(batch_word, batch_features, batch_wordlen, batch_char,
                        batch_charlen, batch_charrecover, mask)
        if label_flag:
            pred_label, gold_label = recover_label(tag_seq, batch_label, mask,
                                                   data.label_alphabet,
                                                   batch_wordrecover)
            gold_results += gold_label
        else:
            pred_label = recover_pred_label(tag_seq, mask, data.label_alphabet,
                                            batch_wordrecover)

        pred_results += pred_label

    decode_time = time.time() - start_time
    speed = len(instances) / decode_time
    if label_flag:
        acc, p, r, f = get_ner_fmeasure(gold_results, pred_results,
                                        data.tagScheme)
    else:
        acc, p, r, f = (0, 0, 0, 0)
    return speed, acc, p, r, f, pred_results, pred_scores
예제 #3
0
def evaluate(data, model, name):
    instances = []
    if name == "train":
        instances = data.train_Ids
    elif name == "dev":
        instances = data.dev_Ids
    elif name == 'test':
        instances = data.test_Ids
    else:
        print("Error: wrong evaluate name,", name)
    pred_results = []
    gold_results = []
    # set model in eval model
    model.eval()
    batch_size = data.HP_batch_size
    start_time = time.time()
    train_num = len(instances)
    total_batch = train_num // batch_size + 1
    for batch_id in range(total_batch):
        start = batch_id * batch_size
        end = (batch_id + 1) * batch_size
        if end > train_num:
            end = train_num
        instance = instances[start:end]
        if not instance:
            continue
        pred_label, gold_label = -1, -1
        if data.model_name == 'WC-LSTM_model':
            gaz_list, reverse_gaz_list, batch_char, batch_bichar, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label_3(
                instance, data.HP_gpu, data.HP_num_layer)
            tag_seq = model(gaz_list, reverse_gaz_list, batch_char,
                            batch_charlen, mask)
            pred_label, gold_label = recover_label(tag_seq, batch_label, mask,
                                                   data.label_alphabet,
                                                   batch_charrecover)
        elif data.model_name == 'CNN_model':
            gaz_list, batch_char, batch_bichar, batch_charlen, batch_label, layer_gaz, gaz_mask, mask = batchify_with_label_2(
                instance, data.HP_gpu, data.HP_num_layer, True)
            tag_seq = model(gaz_list, batch_char, batch_bichar, batch_charlen,
                            layer_gaz, gaz_mask, mask)

            pred_label, gold_label = recover_label_2(tag_seq, batch_label,
                                                     mask, data.label_alphabet)
        elif data.model_name == 'LSTM_model':
            gaz_list, batch_char, batch_bichar, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(
                instance, data.HP_gpu, True)
            tag_seq = model(gaz_list, batch_char, batch_bichar, batch_charlen,
                            mask)
            pred_label, gold_label = recover_label(tag_seq, batch_label, mask,
                                                   data.label_alphabet,
                                                   batch_charrecover)
        pred_results += pred_label
        gold_results += gold_label
    decode_time = time.time() - start_time
    speed = len(instances) / decode_time
    acc, p, r, f = get_ner_fmeasure(gold_results, pred_results, data.tagScheme)
    return speed, acc, p, r, f, pred_results
예제 #4
0
def evaluate(data, model, name, nbest=None):
    if name == "train":
        instances = data.train_Ids
        instances_text = data.train_texts
    elif name == "dev":
        instances = data.dev_Ids
        instances_text = data.dev_texts
    elif name == 'test':
        instances = data.test_Ids
        instances_text = data.test_texts
    elif name == 'raw':
        instances = data.raw_Ids
        instances_text = data.raw_texts
    else:
        print("Error: wrong evaluate name,", name)
        exit(1)
    right_token = 0
    whole_token = 0
    nbest_pred_results = []
    pred_scores = []
    pred_results = []
    gold_results = []
    ## set model in eval model
    model.eval()
    batch_size = data.HP_batch_size
    start_time = time.time()
    train_num = len(instances)
    total_batch = train_num//batch_size+1
    for batch_id in range(total_batch):
        start = batch_id*batch_size
        end = (batch_id+1)*batch_size
        if end > train_num:
            end =  train_num
        instance = instances[start:end]
        instance_text = instances_text[start:end]
        if not instance:
            continue
        batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask, batch_elmo_char = batchify_with_label(instance, instance_text, data.HP_gpu, False, data.sentence_classification)
        if nbest and not data.sentence_classification:
            scores, nbest_tag_seq = model.decode_nbest(batch_word,batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, mask, nbest, batch_elmo_char)
            nbest_pred_result = recover_nbest_label(nbest_tag_seq, mask, data.label_alphabet, batch_wordrecover)
            nbest_pred_results += nbest_pred_result
            pred_scores += scores[batch_wordrecover].cpu().data.numpy().tolist()
            ## select the best sequence to evalurate
            tag_seq = nbest_tag_seq[:,:,0]
        else:
            tag_seq = model(batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, mask, batch_elmo_char)
        # print("tag:",tag_seq)
        pred_label, gold_label = recover_label(tag_seq, batch_label, mask, data.label_alphabet, batch_wordrecover, data.sentence_classification)
        pred_results += pred_label
        gold_results += gold_label
    decode_time = time.time() - start_time
    speed = len(instances)/decode_time
    acc, p, r, f = get_ner_fmeasure(gold_results, pred_results, data.tagScheme)
    if nbest and not data.sentence_classification:
        return speed, acc, p, r, f, nbest_pred_results, pred_scores
    return speed, acc, p, r, f, pred_results, pred_scores
예제 #5
0
def evaluate(data, model, name, nbest=None):
    if name == "train":
        instances = data.train_Ids
    elif name == "dev":
        instances = data.dev_Ids
    elif name == 'test':
        instances = data.test_Ids
    elif name == 'raw':
        instances = data.raw_Ids
    else:
        print("Error: wrong evaluate name,", name)
        exit(1)
    right_token = 0
    whole_token = 0
    total_label_loss = 0

    nbest_pred_results = []
    pred_scores = []
    pred_results = []
    gold_results = []
    ## set model in eval model
    model.eval()
    batch_size = data.HP_batch_size
    start_time = time.time()
    train_num = len(instances)
    total_batch = train_num // batch_size + 1
    for batch_id in range(total_batch):
        start = batch_id * batch_size
        end = (batch_id + 1) * batch_size
        if end > train_num:
            end = train_num
        instance = instances[start:end]
        if not instance:
            continue
        batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask, batch_type, mask_type, type_seq_lengths, word_seq_bert_tensor = batchify_with_label(
            instance, data.HP_gpu, data.label_alphabet_size,
            data.type_alphabet_size - 1)  #)
        label_loss, tag_seq = model(batch_word, batch_features, batch_wordlen,
                                    batch_char, batch_charlen,
                                    batch_charrecover, batch_label, mask,
                                    batch_type, mask_type, type_seq_lengths,
                                    word_seq_bert_tensor)
        total_label_loss += label_loss.item()

        pred_label, gold_label = recover_label(tag_seq, batch_label, mask,
                                               data.label_alphabet,
                                               batch_wordrecover)

        pred_results += pred_label
        gold_results += gold_label
    decode_time = time.time() - start_time
    speed = len(instances) / decode_time
    acc, p, r, f = get_ner_fmeasure(gold_results, pred_results, data.tagScheme)
    if nbest:
        return speed, acc, p, r, f, nbest_pred_results, pred_scores
    return speed, acc, p, r, f, pred_results, pred_scores, total_label_loss / total_batch
예제 #6
0
def evaluate(data, model, name):
    instances = None
    if name == "train":
        instances = data.train_Ids
    elif name == "dev":
        instances = data.dev_Ids
    elif name == 'test':
        instances = data.test_Ids
    elif name == 'raw':
        instances = data.raw_Ids
    else:
        print("Error: wrong evaluate name,", name)
        exit(1)
    start_time = time.time()
    pred_results = []
    gold_results = []
    ## set model in eval model
    model.eval()
    start = 0
    end = start + data.HP_batch_size
    eval_epochs = []
    while end <= len(instances):
        eval_epochs.append((start, end))
        start = end
        end = end + data.HP_batch_size
    if end > len(instances) > start:
        eval_epochs.append((start, len(instances)))
    for idx, (start, end) in enumerate(eval_epochs):
        instance = instances[start:end]
        batch_word, batch_word_len, word_perm_idx, batch_word_recover, batch_label, mask, input_label_seq_tensor = batchify_with_label(
            instance, data.HP_gpu, data)
        with torch.no_grad():
            tag_seq = model.evaluate(batch_word,
                                     batch_word_len,
                                     mask,
                                     input_label_seq_tensor)

        pred_label, gold_label = recover_label(tag_seq, batch_label, mask,
                                               data, batch_word_recover, data.use_crf)

        pred_results.extend(pred_label)
        gold_results.extend(gold_label)

    decode_time = time.time() - start_time
    report = classification_report(gold_results, pred_results,
                                   target_names=data.label_alphabet.instances)
    f_value = f1_score(gold_results, pred_results, average="macro")
    acc = accuracy_score(gold_results, pred_results)
    ner_acc, ner_p, ner_r, ner_f = get_ner_fmeasure(gold_results,
                                                    pred_results,
                                                    data.label_alphabet,
                                                    data.tagScheme,
                                                    name='ner',
                                                    need_save_matrix=name == 'test')
    speed = len(instances) / decode_time
    return speed, acc, report, f_value, ner_acc, ner_p, ner_r, ner_f
예제 #7
0
def train_crf():
	word2id, id2word = load_data(TOKEN_DATA)
	tag2id, id2tag = load_data(TAG_DATA)
	_, _, train_, x_train, y_train = generate_data(TRAIN_DATA, word2id, tag2id, max_len=hp.max_len)
	_, _, dev_seq_lens, x_dev, y_dev = generate_data(DEV_DATA, word2id, tag2id, max_len=hp.max_len)
	model_file = "logdir/model_crf"
	model = CRF()
	model.fit(x_train, y_train, template_file='model/module/templates.txt', model_file=model_file, max_iter=20)
	pre_seq = model.predict(x_dev, model_file=model_file)
	acc, p, r, f = get_ner_fmeasure(y_dev, pre_seq)
	print('acc:\t{}\tp:\t{}\tr:\t{}\tf:\t{}\n'.format(acc, p, r, f))
예제 #8
0
def evaluate(data, wordseq, model, name, nbest=None):
    if name == "train":
        instances = data.train_Ids
    elif name == "dev":
        instances = data.dev_Ids
    elif name == 'test':
        instances = data.test_Ids
    elif name == 'raw':
        instances = data.raw_Ids
    else:
        print "Error: wrong evaluate name,", name
    right_token = 0
    whole_token = 0
    nbest_pred_results = []
    pred_scores = []
    pred_results = []
    gold_results = []
    ## set model in eval model
    wordseq.eval()
    model.eval()
    batch_size = data.HP_batch_size
    start_time = time.time()
    train_num = len(instances)
    total_batch = train_num//batch_size+1
    for batch_id in range(total_batch):
        start = batch_id*batch_size
        end = (batch_id+1)*batch_size
        if end > train_num:
            end =  train_num
        instance = instances[start:end]
        if not instance:
            continue
        batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask, _ = batchify_with_label(instance, data.HP_gpu, True)
        if nbest:
            hidden = wordseq.forward(batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, None, None)
            scores, nbest_tag_seq = model.decode_nbest(hidden, mask, nbest)
            nbest_pred_result = recover_nbest_label(nbest_tag_seq, mask, data.label_alphabet, batch_wordrecover)
            nbest_pred_results += nbest_pred_result
            pred_scores += scores[batch_wordrecover].cpu().data.numpy().tolist()
            ## select the best sequence to evalurate
            tag_seq = nbest_tag_seq[:,:,0]
        else:
            hidden = wordseq.forward(batch_word, batch_features, batch_wordlen, batch_char, batch_charlen,batch_charrecover, None, None)
            tag_seq = model(hidden, mask)
        # print "tag:",tag_seq
        pred_label, gold_label = recover_label(tag_seq, batch_label, mask, data.label_alphabet, batch_wordrecover)
        pred_results += pred_label
        gold_results += gold_label
    decode_time = time.time() - start_time
    speed = len(instances)/decode_time
    acc, p, r, f = get_ner_fmeasure(gold_results, pred_results, data.tagScheme)
    if nbest:
        return speed, acc, p, r, f, nbest_pred_results, pred_scores
    return speed, acc, p, r, f, pred_results, pred_scores
예제 #9
0
def train_hmm():
	word2id, id2word = load_data(TOKEN_DATA)
	tag2id, id2tag = load_data(TAG_DATA)
	_, _, train_, x_train, y_train = generate_data(TRAIN_DATA, word2id, tag2id, max_len=hp.max_len)
	_, _, dev_seq_lens, x_dev, y_dev = generate_data(DEV_DATA, word2id, tag2id, max_len=hp.max_len)
	model_file = "logdir/model_hmm"
	model = HMM()

	model.fit(x_train, y_train, model_file=model_file)
	pre_seq = model.predict(x_dev, model_file=model_file)
	acc, p, r, f = get_ner_fmeasure(y_dev, pre_seq)
	print('acc:\t{}\tp:\t{}\tr:\t{}\tf:\t{}\n'.format(acc, p, r, f))
예제 #10
0
파일: main.py 프로젝트: xj361685640/NCRFpp
def evaluate(data, model, name, nbest=None):
    if name == "train":
        instances = data.train_Ids
    elif name == "dev":
        instances = data.dev_Ids
    elif name == 'test':
        instances = data.test_Ids
    elif name == 'raw':
        instances = data.raw_Ids
    else:
        print("Error: wrong evaluate name,", name)
        exit(1)
    right_token = 0
    whole_token = 0
    nbest_pred_results = []
    pred_scores = []
    pred_results = []
    gold_results = []
    ## set model in eval model
    model.eval()
    batch_size = data.HP_batch_size
    start_time = time.time()
    train_num = len(instances)
    total_batch = train_num//batch_size+1
    for batch_id in range(total_batch):
        start = batch_id*batch_size
        end = (batch_id+1)*batch_size
        if end > train_num:
            end =  train_num
        instance = instances[start:end]
        if not instance:
            continue
        batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask  = batchify_with_label(instance, data.HP_gpu, True)
        if nbest:
            scores, nbest_tag_seq = model.decode_nbest(batch_word,batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, mask, nbest)
            nbest_pred_result = recover_nbest_label(nbest_tag_seq, mask, data.label_alphabet, batch_wordrecover)
            nbest_pred_results += nbest_pred_result
            pred_scores += scores[batch_wordrecover].cpu().data.numpy().tolist()
            ## select the best sequence to evalurate
            tag_seq = nbest_tag_seq[:,:,0]
        else:
            tag_seq = model(batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, mask)
        # print("tag:",tag_seq)
        pred_label, gold_label = recover_label(tag_seq, batch_label, mask, data.label_alphabet, batch_wordrecover)
        pred_results += pred_label
        gold_results += gold_label
    decode_time = time.time() - start_time
    speed = len(instances)/decode_time
    acc, p, r, f = get_ner_fmeasure(gold_results, pred_results, data.tagScheme)
    if nbest:
        return speed, acc, p, r, f, nbest_pred_results, pred_scores
    return speed, acc, p, r, f, pred_results, pred_scores
예제 #11
0
파일: main.py 프로젝트: leileigan/SAN-CWS
def evaluate(data, model, name, external_pos={}):
    if name == "train":
        instances = data.train_Ids
        instance_texts = data.train_texts
    elif name == "dev":
        instances = data.dev_Ids
        instance_texts = data.dev_texts
    elif name == 'test':
        instances = data.test_Ids
        instance_texts = data.test_texts
    elif name == 'raw':
        instances = data.raw_Ids
        instance_texts = data.raw_texts
    else:
        print("Error: wrong evaluate name,", name)
    right_token = 0
    whole_token = 0
    pred_results = []
    gold_results = []
    #  set model in eval model
    model.eval()
    batch_size = 64
    start_time = time.time()
    train_num = len(instances)
    total_batch = train_num // batch_size + 1
    for batch_id in range(total_batch):
        start = batch_id * batch_size
        end = (batch_id + 1) * batch_size
        if end > train_num:
            end = train_num
        instance = instances[start:end]
        instance_text = [sent[0] for sent in instance_texts[start:end]]
        if not instance:
            continue
        batch_word, batch_biword, batch_wordlen, batch_wordrecover, batch_label, mask, rearrange_instance_texts, batch_pos = \
            batchify_with_label(instance_text, instance, data.HP_gpu)
        with torch.no_grad():
            tag_seq = model.forward(rearrange_instance_texts, batch_word,
                                    batch_biword, batch_wordlen, mask,
                                    batch_pos, external_pos)
            pred_label, gold_label = recover_label(tag_seq, batch_label, mask,
                                                   data.label_alphabet,
                                                   batch_wordrecover)
            pred_results += pred_label
            gold_results += gold_label
    decode_time = time.time() - start_time
    speed = len(instances) / decode_time
    acc, p, r, f = get_ner_fmeasure(gold_results, pred_results)
    model.train()
    return speed, acc, p, r, f, pred_results
def evaluate(data, model, name, is_ner):
    if name == "train":
        instances = data.train_Ids
    elif name == "dev":
        instances = data.dev_Ids
    elif name == 'test':
        instances = data.test_Ids
    elif name == "train_ner":
        instances = data.train_Ids
    elif name == "dev_ner":
        instances = data.dev_Ids
    elif name == 'test_ner':
        instances = data.test_Ids
    elif name == 'raw':
        instances = data.raw_Ids
    else:
        print("Error: wrong evaluate name,", name)
    right_token = 0
    whole_token = 0
    pred_results = []
    gold_results = []
    ## set model in eval model
    model.eval()
    batch_size = 1
    start_time = time.time()
    train_num = len(instances)
    total_batch = train_num // batch_size + 1
    for batch_id in range(total_batch):
        start = batch_id * batch_size
        end = (batch_id + 1) * batch_size
        if end > train_num:
            end = train_num
        instance = instances[start:end]
        if not instance:
            continue
        gaz_list, batch_word, batch_biword, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(
            instance, data.HP_gpu, True)
        tag_seq = model(is_ner, gaz_list, batch_word, batch_biword,
                        batch_wordlen, batch_char, batch_charlen,
                        batch_charrecover, mask)
        # print( "tag:",tag_seq)
        pred_label, gold_label = recover_label(tag_seq, batch_label, mask,
                                               data.label_alphabet,
                                               batch_wordrecover)
        pred_results += pred_label
        gold_results += gold_label
    decode_time = time.time() - start_time
    speed = len(instances) / decode_time
    acc, p, r, f = get_ner_fmeasure(gold_results, pred_results, data.tagScheme)
    return speed, acc, p, r, f, pred_results
예제 #13
0
파일: main.py 프로젝트: xiandshi/LGN
def evaluate(data, args, model, name):
    if name == "train":
        instances = data.train_Ids
    elif name == "dev":
        instances = data.dev_Ids
    elif name == 'test':
        instances = data.test_Ids
    elif name == 'raw':
        instances = data.raw_Ids
    else:
        print("Error: wrong evaluate name,", name)
        exit(0)

    pred_results = []
    gold_results = []

    # set model in eval model
    model.eval()
    batch_size = args.batch_size
    start_time = time.time()
    train_num = len(instances)
    total_batch = train_num // batch_size + 1

    for batch_id in range(total_batch):
        start = batch_id * batch_size
        end = (batch_id + 1) * batch_size
        if end > train_num:
            end = train_num
        instance = instances[start:end]
        if not instance:
            continue

        word_list, batch_char, batch_label, mask = batchify_with_label(
            instance, args.use_gpu)
        _, tag_seq = model(word_list, batch_char, mask)

        pred_label, gold_label = recover_label(tag_seq, batch_label, mask,
                                               data.label_alphabet)

        pred_results += pred_label
        gold_results += gold_label

    decode_time = time.time() - start_time
    speed = len(instances) / decode_time

    acc, p, r, f = get_ner_fmeasure(gold_results, pred_results)
    return speed, acc, p, r, f, pred_results
예제 #14
0
def train(network='rnn'):
	word2id, id2word = load_data(TOKEN_DATA)
	tag2id, id2tag = load_data(TAG_DATA)
	x_train, y_train, seq_lens, _, _ = generate_data(TRAIN_DATA, word2id, tag2id, max_len=hp.max_len)
	x_dev, y_dev, dev_seq_lens, _, source_tag = generate_data(DEV_DATA, word2id, tag2id, max_len=hp.max_len)
	vocab_size = len(word2id)
	num_tags = len(tag2id)
	if network == "transformer":
		model = TransformerCRFModel(vocab_size, num_tags, is_training=True)
	elif network == 'rnn':
		model = BiRnnCRF(vocab_size, num_tags)
	elif network == 'cnn':
		model = CnnCRF(vocab_size, num_tags)
	elif network == 'match-pyramid':
		model = CnnCRF(vocab_size, num_tags)
	else:
		return
	sv = tf.train.Supervisor(graph=model.graph, logdir=logdir, save_model_secs=0)
	with sv.managed_session() as sess:
		for epoch in range(1, hp.num_epochs + 1):
			if sv.should_stop():
				break
			train_loss = []
			for x_batch, y_batch, len_batch in batch_data(x_train, y_train, seq_lens, hp.batch_size):
				feed_dict = {model.x: x_batch, model.y: y_batch, model.seq_lens: len_batch}
				loss, _ = sess.run([model.loss, model.train_op], feed_dict=feed_dict)
				train_loss.append(loss)
			
			dev_loss = []
			predict_lists = []
			for x_batch, y_batch, len_batch in batch_data(x_dev, y_dev, dev_seq_lens, hp.batch_size):
				feed_dict = {model.x: x_batch, model.y: y_batch, model.seq_lens: len_batch}
				loss, logits = sess.run([model.loss, model.logits], feed_dict)
				dev_loss.append(loss)
				
				transition = model.transition.eval(session=sess)
				pre_seq = model.predict(logits, transition, len_batch)
				pre_label = recover_label(pre_seq, len_batch, id2tag)
				predict_lists.extend(pre_label)
			train_loss_v = np.round(float(np.mean(train_loss)), 4)
			dev_loss_v = np.round(float(np.mean(dev_loss)), 4)
			print('****************************************************')
			acc, p, r, f = get_ner_fmeasure(source_tag, predict_lists)
			print('epoch:\t{}\ttrain loss:\t{}\tdev loss:\t{}'.format(epoch, train_loss_v, dev_loss_v))
			print('acc:\t{}\tp:\t{}\tr:\t{}\tf:\t{}'.format(acc, p, r, f))
			print('****************************************************\n\n')
예제 #15
0
def evaluate(data, model, args, name):
    if name == "train":
        instances = data.train_ids
        texts = data.train_texts
    elif name == "dev":
        instances = data.dev_ids
        texts = data.dev_texts
    elif name == 'test':
        instances = data.test_ids
        texts = data.test_texts
    else:
        print("Error: wrong evaluate name,", name)
    pred_results = []
    gold_results = []
    model.eval()
    batch_size = args.batch_size
    start_time = time.time()
    train_num = len(instances)
    total_batch = train_num // batch_size + 1
    o_label = data.label_alphabet.get_index("O")
    for batch_id in range(total_batch):
        start = batch_id * batch_size
        end = (batch_id + 1) * batch_size
        if end > train_num:
            end = train_num
        instance = instances[start:end]
        text = texts[start:end]
        if not instance:
            continue
        input_ids, attention_mask, label_seq_tensor, loss_mask, crf_mask, scope = batchify(
            instance, args, o_label)
        tag_seq = model(input_ids, attention_mask, crf_mask, scope)
        pred_label, gold_label = recover_label(tag_seq, label_seq_tensor,
                                               attention_mask,
                                               data.label_alphabet)
        pred_results += pred_label
        gold_results += gold_label
    decode_time = time.time() - start_time
    speed = len(instances) / decode_time
    acc, p, r, f = get_ner_fmeasure(gold_results, pred_results, data.tagscheme)
    return speed, acc, p, r, f, pred_results
예제 #16
0
def evaluate(data, model, name, gpu):
    if name == "dev":
        instances = data.dev_Ids
    elif name == "test":
        instances = data.test_Ids
    else:
        print "Error: wrong evaluate name,", name
    right_token = 0
    whole_token = 0
    pred_results = []
    gold_results = []
    ## set model in eval model
    model.eval()
    for words, chars, label in instances:
        label = autograd.Variable(torch.LongTensor(label))
        pred_score, tag_seq = model([words,chars], gpu)
        pred_label, gold_label = recover_label(tag_seq, label, data.label_alphabet)
        pred_results.append(pred_label)
        gold_results.append(gold_label)
    acc, p, r, f = get_ner_fmeasure(gold_results, pred_results, data.tagScheme)
    return acc, p, r, f
예제 #17
0
def evaluate(data, model, name):
    if name == "train":
        instances = data.train_Ids
    elif name == "dev":
        instances = data.dev_Ids
    elif name == 'test':
        instances = data.test_Ids
    elif name == 'raw':
        instances = data.raw_Ids
    else:
        print ("Error: wrong evaluate name,", name)
    pred_results = []
    gold_results = []
    ## set model in eval model
    model.eval()
    batch_size = 10
    start_time = time.time()
    train_num = len(instances)
    total_batch = train_num//batch_size+1
    for batch_id in range(total_batch):
        start = batch_id*batch_size
        end = (batch_id+1)*batch_size 
        if end >train_num:
            end =  train_num
        instance = instances[start:end]
        if not instance:
            continue
        gaz_list,batch_word, batch_biword, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask  = batchify_with_label(instance, data.HP_gpu, True)
        tag_seq = model(gaz_list,batch_word, batch_biword, batch_wordlen, batch_char, batch_charlen, batch_charrecover, mask)
        # print "tag:",tag_seq
        pred_label, gold_label = recover_label(tag_seq, batch_label, mask, data.label_alphabet, batch_wordrecover)
        pred_results += pred_label
        gold_results += gold_label
    decode_time = time.time() - start_time
    speed = len(instances)/decode_time
    acc, p, r, f = get_ner_fmeasure(gold_results, pred_results, data.tagScheme)
    return speed, acc, p, r, f, pred_results  
예제 #18
0
def evaluate(dataUniqueId, dataIds, dataMask, dataVectors, dataLabels,
             dataKnowledgeVector, model, padding_label, labelRindex):
    ## 评价函数
    pred_results = []
    gold_results = []
    ## set model in eval model
    model.eval()
    batch_size = 1
    start_time = time.time()

    eval_data = TensorDataset(dataUniqueId, dataIds, dataMask, dataVectors,
                              dataLabels)
    eval_sampler = SequentialSampler(eval_data)
    eval_dataloader = DataLoader(eval_data,
                                 sampler=eval_sampler,
                                 batch_size=batch_size)
    for uniqueIds, input_batch_list, mask, instanceVectors, input_batch_label in eval_dataloader:
        knowledgeExamples = [
            dataKnowledgeVector[int(idx)] for idx in uniqueIds
        ]
        max_entity_num = max(
            [len(xxx) for yyy in knowledgeExamples for xxx in yyy])
        batch_word, batch_knowledge, word_seq_tensor, batch_wordlen, batch_wordrecover, batch_label, mask, knowledge_mask = batchify_with_label(
            instanceVectors, input_batch_list, input_batch_label,
            knowledgeExamples, mask, GPU, padding_label, max_entity_num)
        tag_seq = model.forward(batch_word, batch_knowledge, mask,
                                knowledge_mask, batch_label, batch_wordlen,
                                dynanmic_meta_embedding)
        pred_label, gold_label = recover_label(tag_seq, batch_label, mask,
                                               batch_wordrecover, labelRindex)
        pred_results += pred_label
        gold_results += gold_label

    decode_time = time.time() - start_time
    speed = len(dataIds) / decode_time
    fmeasure, acc = get_ner_fmeasure(gold_results, pred_results)
    return speed, fmeasure, acc
예제 #19
0
def evaluate(data, model, name, nbest=None):

    if name == "train":
        instances_1 = data.source_train_idx
        instances_2 = data.target_train_idx
    elif name == "dev-test":
        instances_1 = data.source_dev_idx
        instances_2 = data.target_dev_idx
    elif name == 'test':
        instances_1 = data.source_test_idx
        instances_2 = data.target_test_idx
    else:
        print("Error: wrong evaluate name,", name)
        exit(1)

    ## set model in eval model
    model.eval()
    batch_size = data.HP_batch_size
    start_time = time.time()

    nbest_pred_results_1 = []
    pred_scores_1 = []
    pred_results_1 = []
    gold_results_1 = []
    train_num = len(instances_1)
    total_batch = train_num // batch_size + 1
    for batch_id in range(total_batch):
        start = batch_id * batch_size
        end = (batch_id + 1) * batch_size
        if end > train_num:
            end = train_num
        instance = instances_1[start:end]
        if not instance:
            continue
        batch_word, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, \
        lm_seq_tensor, mask = batchify_with_label(instance, data.HP_gpu, True)

        if nbest:
            scores, nbest_tag_seq = model.decode_nbest('model2', batch_word, batch_wordlen, batch_char,
                                                       batch_charlen, batch_charrecover, mask, nbest)
            nbest_pred_result = recover_nbest_label(nbest_tag_seq, mask, data.source_label_alphabet, batch_wordrecover)
            nbest_pred_results_1 += nbest_pred_result
            pred_scores_1 += scores[batch_wordrecover].cpu().data.numpy().tolist()
            ## select the best sequence to evalurate
            tag_seq_1 = nbest_tag_seq[:, :, 0]
        else:
            tag_seq_1 = model('model2', batch_word, batch_wordlen, batch_char, batch_charlen,
                              batch_charrecover, mask)
        # print("tag:",tag_seq)
        pred_label, gold_label = recover_label(tag_seq_1, batch_label, mask, data.source_label_alphabet, batch_wordrecover)
        pred_results_1 += pred_label
        gold_results_1 += gold_label
    # decode_time = time.time() - start_time
    # speed = len(instances)/decode_time
    acc_1, p_1, r_1, f_1 = get_ner_fmeasure(gold_results_1, pred_results_1, "BMES")

    nbest_pred_results_2 = []
    pred_scores_2 = []
    pred_results_2 = []
    gold_results_2 = []
    train_num = len(instances_2)
    total_batch = train_num // batch_size + 1
    for batch_id in range(total_batch):
        start = batch_id * batch_size
        end = (batch_id + 1) * batch_size
        if end > train_num:
            end = train_num
        instance = instances_2[start:end]
        if not instance:
            continue
        batch_word, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label,\
        lm_seq_tensor, mask = batchify_with_label(instance, data.HP_gpu, True)

        if nbest:
            scores, nbest_tag_seq = model.decode_nbest('model4', batch_word, batch_wordlen, batch_char,
                                                       batch_charlen, batch_charrecover, mask, nbest)
            nbest_pred_result = recover_nbest_label(nbest_tag_seq, mask, data.target_label_alphabet, batch_wordrecover)
            nbest_pred_results_2 += nbest_pred_result
            pred_scores_2 += scores[batch_wordrecover].cpu().data.numpy().tolist()
            ## select the best sequence to evalurate
            tag_seq_2 = nbest_tag_seq[:, :, 0]
        else:
            tag_seq_2 = model('model4', batch_word, batch_wordlen, batch_char, batch_charlen,
                              batch_charrecover, mask)
        # print("tag:",tag_seq)
        pred_label, gold_label = recover_label(tag_seq_2, batch_label, mask, data.target_label_alphabet, batch_wordrecover)
        pred_results_2 += pred_label
        gold_results_2 += gold_label
    # decode_time = time.time() - start_time
    # speed = len(instances)/decode_time
    acc_2, p_2, r_2, f_2 = get_ner_fmeasure(gold_results_2, pred_results_2, "BMES")
    acc = [acc_1, acc_2]
    p = [p_1, p_2]
    r = [r_1, r_2]
    f = [f_1, f_2]
    pred_results = [pred_results_1, pred_results_2]
    pred_scores = [pred_scores_1, pred_scores_2]
    nbest_pred_results = [nbest_pred_results_1, nbest_pred_results_2]
    decode_time = time.time() - start_time
    speed = (len(instances_1) + len(instances_2)) / decode_time
    if nbest:
        return speed, acc, p, r, f, nbest_pred_results, pred_scores

    return speed, acc, p, r, f, pred_results, pred_scores
예제 #20
0
def evaluate(data, model, name, inference, nbest=None):
    
    if name == "train":
        instances = data.train_Ids
    elif name == "dev":
        instances = data.dev_Ids
    elif name == "test":
        instances = data.test_Ids
    elif name == 'raw':
        instances = data.raw_Ids
    else:
        print "Error: wrong evaluate name,", name
    right_token = 0
    whole_token = 0
    nbest_pred_results = []
    pred_scores = []
    pred_results = []
    gold_results = []
    ## set model in eval model
    model.eval()
    batch_size = 128#len(instances)#128 #For comparison against Vinyals et al. (2015)
    start_time = time.time()
    train_num = len(instances)
    total_batch = train_num//batch_size+1
     
    pred_labels = {idtask:[] for idtask in range(data.HP_tasks)}
    gold_labels = {idtask:[] for idtask in range(data.HP_tasks)}
     
    nbest_pred_labels = {idtask:[] for idtask in range(data.HP_tasks)}
    nbest_pred_scores = {idtask:[] for idtask in range(data.HP_tasks)}

    if data.disjoint:
        treebank_indexes = {}
        for idxsample, sample in enumerate(instances):
            if sample[-1] not in treebank_indexes:
                treebank_indexes[sample[-1]] = []   
            treebank_indexes[sample[-1]].append(idxsample)    

    for batch_id in range(total_batch):
        start = batch_id*batch_size
        end = (batch_id+1)*batch_size 
        if end > train_num:
            end = train_num
        instance = instances[start:end]

        if not instance:
            continue
        batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask  = batchify_with_label(instance, data.HP_gpu, inference, True)
        if nbest:
#            scores, nbest_tag_seq = model.decode_nbest(batch_word,batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, mask, nbest)
            scores, nbest_tag_seq = model.decode_nbest(batch_word,batch_features, 
                                                       batch_wordlen, batch_char, 
                                                       batch_charlen, batch_charrecover, mask, 
                                                       inference, nbest)
            
            tag_seq = []
            
            for idtask, task_nbest_tag_seq in enumerate(nbest_tag_seq):   
                nbest_pred_result = recover_nbest_label(task_nbest_tag_seq, mask, data.label_alphabet[idtask], batch_wordrecover)
                nbest_pred_labels[idtask] += nbest_pred_result
                nbest_pred_scores[idtask] += scores[idtask][batch_wordrecover].cpu().data.numpy().tolist()
                tag_seq.append(task_nbest_tag_seq[:,:,0])
            
        else:
            tag_seq = model(batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, mask,
                            inference=inference)

        if not inference:
            for idtask, task_tag_seq in enumerate(tag_seq):
                pred_label, gold_label = recover_label(task_tag_seq, batch_label[idtask], mask, data.label_alphabet[idtask], 
                                                       batch_wordrecover, inference=inference)
                
                pred_labels[idtask]+=pred_label
                gold_labels[idtask]+=gold_label
        else:
            
            for idtask, task_tag_seq in enumerate(tag_seq):
                pred_label, _ = recover_label(task_tag_seq, None, mask, data.label_alphabet[idtask], 
                                                       batch_wordrecover, inference=inference)
                pred_labels[idtask]+=pred_label

    decode_time = time.time() - start_time
    speed = len(instances)/decode_time
     
    # Evaluating the different tasks
    tasks_results = []
    range_tasks = data.HP_tasks if not inference else data.HP_main_tasks
    for idtask in range(range_tasks):

        valid_indexes=None
        if not inference:
            
            valid_gold_labels = [g for idx,g in enumerate(gold_labels[idtask])
                                 if not data.disjoint or idx in treebank_indexes[data.inv_dataset_ids[idtask]]]
            valid_pred_labels = [p for idx,p in enumerate(pred_labels[idtask])
                                 if not data.disjoint or idx in treebank_indexes[data.inv_dataset_ids[idtask]]]
            
            valid_indexes = [idx for idx,p in enumerate(pred_labels[idtask])
                             if not data.disjoint or idx in treebank_indexes[data.inv_dataset_ids[idtask]]]
            
            acc, p, r, f = get_ner_fmeasure(valid_gold_labels, valid_pred_labels, data.tagScheme)
        else:
            acc, p, r, f = -1, -1, -1,-1
        
        if nbest:
            raise NotImplementedError
        else:
            tasks_results.append((speed,acc,p,r,f,pred_labels[idtask],nbest_pred_scores[idtask], valid_indexes))

    return tasks_results
def evaluate(data, model, name, nbest=None):
    if name == "train":
        instances = data.train_Ids
    elif name == "dev":
        instances = data.dev_Ids
    elif name == 'test':
        instances = data.test_Ids
    elif name == 'raw':
        instances = data.raw_Ids
    else:
        print("Error: wrong evaluate name,", name)
        exit(1)
    right_token = 0
    whole_token = 0
    nbest_pred_results = []
    pred_scores = []
    pred_results = []
    gold_results = []
    ## set model in eval model
    model.eval()
    batch_size = data.HP_batch_size
    start_time = time.time()
    train_num = len(instances)
    total_batch = train_num // batch_size + 1
    for batch_id in range(total_batch):
        start = batch_id * batch_size
        end = (batch_id + 1) * batch_size
        if end > train_num:
            end = train_num
        instance = instances[start:end]
        if not instance:
            continue
        batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(
            instance, data.HP_gpu, False, data.sentence_classification)
        if nbest and not data.sentence_classification:
            scores, nbest_tag_seq = model.decode_nbest(
                batch_word, batch_features, batch_wordlen, batch_char,
                batch_charlen, batch_charrecover, mask, nbest)
            nbest_pred_result = recover_nbest_label(nbest_tag_seq, mask,
                                                    data.label_alphabet,
                                                    batch_wordrecover)
            nbest_pred_results += nbest_pred_result
            pred_scores += scores[batch_wordrecover].cpu().data.numpy().tolist(
            )
            ## select the best sequence to evalurate
            tag_seq = nbest_tag_seq[:, :, 0]
        else:
            tag_seq = model(batch_word, batch_features, batch_wordlen,
                            batch_char, batch_charlen, batch_charrecover, mask)
        # print("tag:",tag_seq)
        pred_label, gold_label = recover_label(tag_seq, batch_label, mask,
                                               data.label_alphabet,
                                               batch_wordrecover,
                                               data.sentence_classification)
        pred_results += pred_label
        gold_results += gold_label
    decode_time = time.time() - start_time
    speed = len(instances) / decode_time
    internal_acc, internal_p, internal_r, internal_f = get_ner_fmeasure(
        gold_results, pred_results, data.tagScheme)

    # Do a second evaluation using seqeval
    acc, p, r, f = seqeval_score(gold_results, pred_results)

    if acc != internal_acc:
        print(
            f"Accuracies disagree: {acc} (seqeval), {internal_acc} (NCRFpp), delta {internal_acc - acc}"
        )

    if p != internal_p:
        print(
            f"Precisions disagree: {p} (seqeval), {internal_p} (NCRFpp), delta {internal_p - p}"
        )

    if r != internal_r:
        print(
            f"Recalls disagree: {r} (seqeval), {internal_r} (NCRFpp), delta {internal_r - r}"
        )

    if f != internal_f:
        print(
            f"F1s disagree: {f} (seqeval), {internal_f} (NCRFpp), delta {internal_f - f}"
        )

    if nbest and not data.sentence_classification:
        return speed, (acc, p, r,
                       f), (internal_acc, internal_p, internal_r,
                            internal_f), nbest_pred_results, pred_scores
    return speed, (acc, p, r, f), (internal_acc, internal_p, internal_r,
                                   internal_f), pred_results, pred_scores
예제 #22
0
def evaluate(data, model, name, nbest=None):
    if name == "train":
        instances = data.train_Ids
    elif name == "dev":
        instances = data.dev_Ids
    elif name == 'test':
        instances = data.test_Ids
    elif name == 'raw':
        instances = data.raw_Ids
    else:
        print("Error: wrong evaluate name,", name)
        exit(1)
    nbest_pred_results = []
    pred_scores = []
    nb_results = 3
    pred_results = {i: [] for i in range(nb_results)}
    all_sorted_probs = {i: [] for i in range(nb_results)}
    gold_results = []
    model.eval()
    batch_size = 128
    start_time = time.time()
    train_num = len(instances)
    total_batch = train_num // batch_size + 1
    for batch_id in range(total_batch):
        start = batch_id * batch_size
        end = (batch_id + 1) * batch_size
        if end > train_num:
            end = train_num
        instance = instances[start:end]
        if not instance:
            continue
        batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(
            instance, data.HP_gpu, True)
        if nbest:
            scores, nbest_tag_seq = model.decode_nbest(
                batch_word, batch_features, batch_wordlen, batch_char,
                batch_charlen, batch_charrecover, mask, nbest)
            nbest_pred_result = recover_nbest_label(nbest_tag_seq, mask,
                                                    data.label_alphabet,
                                                    batch_wordrecover)
            nbest_pred_results += nbest_pred_result
            pred_scores += scores[batch_wordrecover].cpu().data.numpy().tolist(
            )

        else:
            best_indices, sorted_probs = model(batch_word, batch_features,
                                               batch_wordlen, batch_char,
                                               batch_charlen,
                                               batch_charrecover, mask)
        gold_label = None
        for i, best_index in enumerate(best_indices):
            specific_pred, gold_label = recover_label(best_index, batch_label,
                                                      mask,
                                                      data.label_alphabet,
                                                      batch_wordrecover)
            pred_results[i] += specific_pred
        gold_results += gold_label
        for k, sorted_prob in enumerate(sorted_probs):
            all_sorted_probs[k] += sorted_prob.data.cpu().numpy().tolist()

    decode_time = time.time() - start_time
    speed = len(instances) / decode_time
    acc, p, r, f = get_ner_fmeasure(gold_results, pred_results[0],
                                    data.tagScheme)
    acc_instances = len(instances)
    acc_speed = decode_time
    if nbest:
        return speed, acc, p, r, f, nbest_pred_results, pred_scores
    return speed, acc, p, r, f, pred_results, pred_scores, all_sorted_probs, acc_instances, acc_speed
예제 #23
0
파일: main.py 프로젝트: aghie/disco2labels
def evaluate(data, model, name, inference, nbest=None):
    if name == "train":
        instances = data.train_Ids
    elif name == "dev":
        instances = data.dev_Ids
    elif name == "test":
        instances = data.test_Ids
    elif name == 'raw':
        instances = data.raw_Ids
    else:
        print("Error: wrong evaluate name,", name)

    right_token = 0
    whole_token = 0
    nbest_pred_results = []
    pred_scores = []
    pred_results = []
    gold_results = []
    # set model in eval model
    model.eval()
    # len(instances)#128 #For comparison against Vinyals et al. (2015)
    batch_size = 128
    start_time = time.time()
    train_num = len(instances)
    total_batch = train_num // batch_size + 1

    # Variable to collect the preds and gold prediction in multitask
    # learning
    pred_labels = {idtask: [] for idtask in range(data.HP_tasks)}
    gold_labels = {idtask: [] for idtask in range(data.HP_tasks)}

    nbest_pred_labels = {idtask: [] for idtask in range(data.HP_tasks)}
    nbest_pred_scores = {idtask: [] for idtask in range(data.HP_tasks)}

    for batch_id in range(total_batch):
        start = batch_id * batch_size
        end = (batch_id + 1) * batch_size
        if end > train_num:
            end = train_num
        instance = instances[start:end]
        if not instance:
            continue

        batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(
            instance, data.HP_gpu, inference, True)
        if nbest:
            scores, nbest_tag_seq = model.decode_nbest(
                batch_word, batch_features, batch_wordlen, batch_char,
                batch_charlen, batch_charrecover, mask, inference, nbest)
            tag_seq = []

            for idtask, task_nbest_tag_seq in enumerate(nbest_tag_seq):
                nbest_pred_result = recover_nbest_label(
                    task_nbest_tag_seq, mask, data.label_alphabet[idtask],
                    batch_wordrecover)
                nbest_pred_labels[idtask] += nbest_pred_result
                nbest_pred_scores[idtask] += scores[idtask][
                    batch_wordrecover].cpu().data.numpy().tolist()
                tag_seq.append(task_nbest_tag_seq[:, :, 0])

        else:
            tag_seq = model(batch_word,
                            batch_features,
                            batch_wordlen,
                            batch_char,
                            batch_charlen,
                            batch_charrecover,
                            mask,
                            inference=inference)

        if not inference:

            for idtask, task_tag_seq in enumerate(tag_seq):
                pred_label, gold_label = recover_label(
                    task_tag_seq,
                    batch_label[idtask],
                    mask,
                    data.label_alphabet[idtask],
                    batch_wordrecover,
                    inference=inference)
                pred_labels[idtask] += pred_label
                gold_labels[idtask] += gold_label
        else:

            if len(data.index_of_main_tasks) == data.HP_tasks:
                for idtask, task_tag_seq in enumerate(tag_seq):
                    pred_label, _ = recover_label(task_tag_seq,
                                                  None,
                                                  mask,
                                                  data.label_alphabet[idtask],
                                                  batch_wordrecover,
                                                  inference=inference)
                    pred_labels[idtask] += pred_label

            else:

                index_task = data.index_of_main_tasks[0]
                for idtask, task_tag_seq in enumerate(tag_seq):
                    pred_label, _ = recover_label(
                        task_tag_seq,
                        None,
                        mask,
                        data.label_alphabet[index_task],
                        batch_wordrecover,
                        inference=inference)
                    pred_labels[idtask] += pred_label
                    index_task += 1

    decode_time = time.time() - start_time
    speed = len(instances) / decode_time

    tasks_results = []
    range_tasks = data.HP_tasks if not inference else len(
        data.index_of_main_tasks)
    for idtask in range(range_tasks):

        if not inference:
            acc, p, r, f = get_ner_fmeasure(gold_labels[idtask],
                                            pred_labels[idtask],
                                            data.tagScheme)
        else:
            acc, p, r, f = -1, -1, -1, -1

        if nbest:
            tasks_results.append(
                (speed, acc, p, r, f, nbest_pred_labels[idtask],
                 nbest_pred_scores[idtask]))
        else:
            tasks_results.append((speed, acc, p, r, f, pred_labels[idtask],
                                  nbest_pred_scores[idtask]))
    return tasks_results
예제 #24
0
def evaluate(domain_tag, data, model, name, nbest=None):
    if name == "train":
        if domain_tag == "Source":
            instances = data.train_Ids_S
        elif domain_tag == "Target":
            instances = data.train_Ids_T
    elif name == "dev":
        if domain_tag == "Source":
            instances = data.dev_Ids_S
        elif domain_tag == "Target":
            instances = data.dev_Ids_T
    elif name == 'test':
        if domain_tag == "Source":
            instances = data.test_Ids_S
        elif domain_tag == "Target":
            instances = data.test_Ids_T
    elif name == 'raw':
        if domain_tag == "Target" or domain_tag == "Source":
            instances = data.raw_Ids
    if domain_tag == "Source":
        label_alphabet = data.label_alphabet_S
        entity_alphabet = data.entity_alphabet_S
    elif domain_tag == "Target":
        label_alphabet = data.label_alphabet_T
        entity_alphabet = data.entity_alphabet_T

    else:
        print("Error: wrong evaluate name,", name)
        exit(1)
    nbest_pred_results = []
    pred_scores = []
    pred_results = []
    gold_results = []
    gold_entity_results = []
    pred_entity_results = []
    gold_probs_results = []
    pred_probs_results = []
    ## set model in eval model
    model.eval()
    batch_size = data.HP_batch_size
    start_time = time.time()
    train_num = len(instances)
    total_batch = train_num // batch_size + 1
    for batch_id in range(total_batch):
        start = batch_id * batch_size
        end = (batch_id + 1) * batch_size
        if end > train_num:
            end = train_num
        instance = instances[start:end]
        if not instance:
            continue
        original_words_batch, batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, batch_entity, mask = batchify_with_label(
            instance, data.HP_gpu, False, data.sentence_classification)
        if nbest and not data.sentence_classification:
            scores, nbest_tag_seq, entity_seq, atten_probs_seq = model.decode_nbest(
                original_words_batch, domain_tag, batch_word, batch_features,
                batch_wordlen, batch_char, batch_charlen, batch_charrecover,
                mask, nbest, batch_entity)
            nbest_pred_result = recover_nbest_label(nbest_tag_seq, mask,
                                                    label_alphabet,
                                                    batch_wordrecover)
            nbest_pred_results += nbest_pred_result
            pred_scores += scores[batch_wordrecover].cpu().data.numpy().tolist(
            )
            ## select the best sequence to evalurate
            tag_seq = nbest_tag_seq[:, :, 0]
        else:
            tag_seq, entity_seq, atten_probs_seq = model(
                original_words_batch, domain_tag, batch_word, batch_features,
                batch_wordlen, batch_char, batch_charlen, batch_charrecover,
                mask)
        # recover entity and probs results
        if entity_seq is not None:
            pred_entity, gold_entity = recover_label(
                entity_seq, batch_entity, mask, entity_alphabet,
                batch_wordrecover, data.sentence_classification)
            pred_entity_results += pred_entity
            gold_entity_results += gold_entity
        if atten_probs_seq is not None:
            pred_probs, gold_probs = recover_label(
                atten_probs_seq, batch_entity, mask, entity_alphabet,
                batch_wordrecover, data.sentence_classification)
            pred_probs_results += pred_probs
            gold_probs_results += gold_probs

        pred_label, gold_label = recover_label(tag_seq, batch_label, mask,
                                               label_alphabet,
                                               batch_wordrecover,
                                               data.sentence_classification)
        pred_results += pred_label
        gold_results += gold_label
    decode_time = time.time() - start_time
    speed = len(instances) / decode_time
    print("word acc:")
    acc, p, r, f = get_ner_fmeasure(gold_results, pred_results, data.tagScheme)
    if len(gold_entity_results) > 0:
        print("entity acc")
        entity_acc, _, _, _ = get_ner_fmeasure(gold_entity_results,
                                               pred_entity_results,
                                               "entity predict")
    if len(gold_probs_results) > 0:
        print("probs acc:")
        probs_acc, _, _, _ = get_ner_fmeasure(gold_probs_results,
                                              pred_probs_results,
                                              "probs predict")
    if nbest and not data.sentence_classification:
        return speed, acc, p, r, f, nbest_pred_results, pred_scores, pred_entity_results, pred_probs_results
    return speed, acc, p, r, f, pred_results, pred_scores
예제 #25
0
def evaluate(data, model, name):
    instances = None
    if name == "train":
        instances = data.train_Ids
    elif name == "dev":
        instances = data.dev_Ids
    elif name == 'test':
        instances = data.test_Ids
    elif name == 'raw':
        instances = data.raw_Ids
    else:
        logger.info("Error: wrong evaluate name,", name)
        exit(1)
    start_time = time.time()
    sent_pred_results = []
    sent_gold_results = []
    word_pred_results = []
    word_gold_results = []
    ## set model in eval model
    model.eval()
    for idx, instance in enumerate(instances):
        if not instance or len(instance) <= 1:
            continue
        batch_word, batch_word_len, word_perm_idx, batch_word_recover, batch_label, batch_sent_type, mask, sent_mask, input_label_seq_tensor, input_sent_type_tensor = batchify_with_label(
            instance, data.HP_gpu, data)
        with torch.no_grad():
            words_tag_seq, sent_tag_seq = model.evaluate(
                None,
                batch_word,
                batch_word_len,
                batch_sent_type,
                mask,
                sent_mask,
                input_label_seq_tensor,
                input_sent_type_tensor,
                batch_word_recover,
                word_perm_idx,
                need_cat=False)
        # with codecs.open("attention_input.txt", "a", "utf-8") as w:
        #     obj = ["".join([data.word_alphabet.get_instance(w_idx - 1) if w_idx != 0 else "" for w_idx in sent]) for
        #            sent in batch_word.data.cpu().numpy().tolist()]
        #     json.dump(obj, w)
        #     w.write("\n")
        sent_pred, sent_gold, word_pred_label, word_gold_label = recover_label(
            words_tag_seq, sent_tag_seq, batch_label, batch_sent_type, mask,
            batch_word_recover, data.use_crf)
        sent_pred_results.extend(sent_pred)
        sent_gold_results.extend(sent_gold)
        word_pred_results.extend(word_pred_label)
        word_gold_results.extend(word_gold_label)
    decode_time = time.time() - start_time
    sent_f1 = f1_score(sent_gold_results, sent_pred_results, average="macro")
    sent_report = classification_report(
        sent_gold_results,
        sent_pred_results,
        target_names=data.sentence_type_alphabet.instances,
        digits=4)
    speed = len(instances) / decode_time
    word_acc = accuracy_score(word_gold_results, word_pred_results)
    word_f1 = f1_score(word_gold_results, word_pred_results, average='macro')
    word_report = classification_report(
        word_gold_results,
        word_pred_results,
        target_names=data.label_alphabet.instances,
        digits=4)
    word_ner_acc, word_ner_p, word_ner_r, word_ner_f = get_ner_fmeasure(
        word_gold_results,
        word_pred_results,
        data.label_alphabet,
        data.tagScheme,
        need_save_matrix=name == 'test')

    return speed, word_acc, word_report, word_f1, \
           word_ner_acc, word_ner_p, word_ner_r, word_ner_f, sent_f1, sent_report
예제 #26
0
def evaluate(data, model, name, qleft=None,qright=None,batch_size=1):
    """
        input: 
            name: our current dataset for evaluation
            qleft,qright: the start and end point of the validation data set. 
                          When the validation data set is huge, we can use these parameters to sample the dataset
        output:
            speed:
            acc: accuracy
            p:precision
            r:recall
            f:f1 score
            pred_results:the predict results as a list of string
            p,r,f are useful when you switch to NER dataset
    """
    if name == "train":
        instances = data.train_Ids
    elif name == "dev":
        instances = data.dev_Ids
    elif name == 'test':
        instances = data.test_Ids
    elif name == 'raw':
        instances = data.raw_Ids
    else:
        print("Error: wrong evaluate name,", name)
    right_token = 0
    whole_token = 0
    pred_results = []
    gold_results = []
    ## set model in eval mode
    model.examiner.eval()
    start_time = time.time()
    train_num = len(instances)
    total_batch = train_num//batch_size+1
    if qleft==None:
        qleft=0
        qright=int(total_batch/10)
    if name=="test":
        print("start test")

    for batch_id in range(qleft,qright):
        start = batch_id*batch_size
        end = (batch_id+1)*batch_size 
        if end >train_num:
            end =  train_num
        instance = instances[start:end]
        if not instance:
            continue
        batch_word, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask  = batchify_with_label(instance, data.HP_gpu, True)

        tag_seq = model.test(batch_word)

        pred_label, gold_label = recover_label(tag_seq, batch_label, mask, data.label_alphabet, batch_wordrecover)
        batch_label,_tag_seq,_tag_prob,tag_mask,score,indices,scores_ref=model.pos_selection(batch_word, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask)

        pred_results += pred_label
        gold_results += gold_label


    decode_time = time.time() - start_time
    speed = len(instances)/decode_time

    acc, p, r, f = get_ner_fmeasure(gold_results, pred_results, data.tagScheme)
    return speed, acc, p, r, f, pred_results  
def evaluate(data, model, name, nbest=None):
    if name == "train":
        instances = data.train_Ids
    elif name == "dev":
        instances = data.dev_Ids
    elif name == 'test':
        instances = data.test_Ids
    elif name == 'raw':
        instances = data.raw_Ids
        # print(data.__dict__)
        # print('instances[1]:', instances[1])  # [word,features,char,label]
    else:
        print("Error: wrong evaluate name,", name)
        exit(1)
    # print('data.__dict__:', data.__dict__)
    right_token = 0
    whole_token = 0
    nbest_pred_results = []
    pred_scores = []
    pred_results = []
    gold_results = []
    # set torch_model in eval torch_model
    model.eval()
    batch_size = data.HP_batch_size  # 10
    # print(batch_size)
    start_time = time.time()
    train_num = len(instances)  # 112
    total_batch = train_num // batch_size + 1  # 把raw整体迭代完的batch数,不算epoch
    # print(total_batch)
    for batch_id in range(total_batch):  # 每10个instance为1个要预测的batch
        start = batch_id * batch_size
        end = (batch_id + 1) * batch_size
        if end > train_num:
            end = train_num  # 120>112,则end=112
        instance = instances[start:end]
        # 预测数据最终处理的格式如instance[0],raw.bmes的标签全部设为O
        # print('instance:', len(instance), instance[0])
        # for i in instance:
        #     print(len(i[0]), i[0])
        if not instance:
            continue
        # zero padding for word and char, 用batch中的max_seq_length
        # batchify_with_label:需要有实际的labels
        # 预测:
        batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, \
         batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(
          instance, data.HP_gpu, False, data.sentence_classification)
        if nbest and not data.sentence_classification:
            # 预测结果的输入如下:
            scores, nbest_tag_seq = model.decode_nbest(
                batch_word, batch_features, batch_wordlen, batch_char,
                batch_charlen, batch_charrecover, mask, nbest)
            # print('scores:', scores)
            # print('nbest_tag_seq:', nbest_tag_seq.shape, nbest_tag_seq)  # 每个sen,每个word的标签给出预测,nbest = shape[-1]
            # recover_nbest_label:将顺序调整与input对应,输出nbest个预测结果
            nbest_pred_result = recover_nbest_label(nbest_tag_seq, mask,
                                                    data.label_alphabet,
                                                    batch_wordrecover)
            nbest_pred_results += nbest_pred_result
            pred_scores += scores[batch_wordrecover].cpu().data.numpy().tolist(
            )  # 调整pred_scores与input顺序一致
            # select the best sequence to evalurate
            tag_seq = nbest_tag_seq[:, :, 0]  # 只选了nbest的第一列
            # print('tag_seq:', tag_seq)  # 最终预测结果的序列
        else:
            tag_seq = model(batch_word, batch_features, batch_wordlen,
                            batch_char, batch_charlen, batch_charrecover, mask)
        # print("tag:",tag_seq)
        # recover_label:根据tag_seq还原出预测的label,根据batch_label还原出真实的label
        # batch_label:batch真实的label值
        pred_label, gold_label = recover_label(tag_seq, batch_label, mask,
                                               data.label_alphabet,
                                               batch_wordrecover,
                                               data.sentence_classification)
        # print('pred_label:', pred_label)  # 预测的标签
        # print('gold_label:', gold_label)  # 真实的标签
        pred_results += pred_label
        gold_results += gold_label
        # print('pred_results:', len(pred_results))
        # print('gold_results:', len(gold_results))
    print(name + ' ' + 'pred_results: ', len(pred_results))
    print(name + ' ' + 'gold_results:', len(gold_results))
    decode_time = time.time() - start_time
    # print('decode_time:', decode_time)
    speed = len(instances) / decode_time  # 每秒处理的句子数量
    acc, p, r, f = get_ner_fmeasure(gold_results, pred_results, data.tagScheme)
    if nbest and not data.sentence_classification:
        return speed, acc, p, r, f, nbest_pred_results, pred_scores
    return speed, acc, p, r, f, pred_results, pred_scores
예제 #28
0
    def evaluate_batch(self, eva_data):
        wl = self.args.vocab.wl
        cl = self.args.vocab.cl

        batch_size = self.args.batch_size
        ## set model in eval model
        self.model.eval()
        correct_preds = 0.
        total_preds = 0.
        total_correct = 0.
        accs = []
        pred_results = []
        gold_results = []
        for i, (words, label_ids) in enumerate(
                self.args.vocab.minibatches(eva_data, batch_size=batch_size)):
            char_ids, word_ids = zip(*words)
            word_ids, sequence_lengths = seqPAD.pad_sequences(word_ids,
                                                              pad_tok=0,
                                                              wthres=wl,
                                                              cthres=cl)
            char_ids, word_lengths = seqPAD.pad_sequences(char_ids,
                                                          pad_tok=0,
                                                          nlevels=2,
                                                          wthres=wl,
                                                          cthres=cl)
            label_ids, _ = seqPAD.pad_sequences(label_ids,
                                                pad_tok=0,
                                                wthres=wl,
                                                cthres=cl)

            data_tensors = Data2tensor.sort_tensors(label_ids,
                                                    word_ids,
                                                    sequence_lengths,
                                                    char_ids,
                                                    word_lengths,
                                                    volatile_flag=True)
            label_tensor, word_tensor, sequence_lengths, word_seq_recover, char_tensor, word_lengths, char_seq_recover = data_tensors
            mask_tensor = word_tensor > 0

            label_score = self.model(word_tensor, sequence_lengths,
                                     char_tensor, word_lengths,
                                     char_seq_recover)

            label_prob, label_pred = self.model.inference(
                label_score, mask_tensor)

            pred_label, gold_label = recover_label(label_pred, label_tensor,
                                                   mask_tensor,
                                                   self.args.vocab.l2i,
                                                   word_seq_recover)
            pred_results += pred_label
            gold_results += gold_label
        acc, p, r, f = get_ner_fmeasure(gold_results, pred_results)

        #            label_pred = label_pred.cpu().data.numpy()
        #            label_tensor = label_tensor.cpu().data.numpy()
        #            sequence_lengths = sequence_lengths.cpu().data.numpy()
        #
        #            for lab, lab_pred, length in zip(label_tensor, label_pred, sequence_lengths):
        #                lab      = lab[:length]
        #                lab_pred = lab_pred[:length]
        #                accs    += [a==b for (a, b) in zip(lab, lab_pred)]
        #
        #                lab_chunks      = set(NERchunks.get_chunks(lab, self.args.vocab.l2i))
        #                lab_pred_chunks = set(NERchunks.get_chunks(lab_pred, self.args.vocab.l2i))
        #
        #                correct_preds += len(lab_chunks & lab_pred_chunks)
        #                total_preds   += len(lab_pred_chunks)
        #                total_correct += len(lab_chunks)
        #
        #        p   = correct_preds / total_preds if correct_preds > 0 else 0
        #        r   = correct_preds / total_correct if correct_preds > 0 else 0
        #        f  = 2 * p * r / (p + r) if correct_preds > 0 else 0
        #        acc = np.mean(accs)

        return acc, f
예제 #29
0
def evaluate(data, model, name):
    if name == "train":
        instances = data.train_Ids
    elif name == "dev":
        instances = data.dev_Ids
    elif name == 'test':
        instances = data.test_Ids
    elif name == 'raw':
        instances = data.raw_Ids
    else:
        print("Error: wrong evaluate name,", name)
        exit(1)

    gold_results = []
    pred_results = []

    batch_size = data.batch_size
    start_time = time.time()
    train_num = len(instances)  # 文档数
    total_batch = train_num // batch_size + 1

    # 设置 model 为评估模式
    model.eval()

    with torch.no_grad():  # 起始时,清空梯度
        for batch_id in range(total_batch):
            # 取 instances[start:end]
            start = batch_id * batch_size
            end = (batch_id + 1) * batch_size
            if end > train_num:
                end = train_num
            instance = instances[start:end]
            if not instance:
                continue

            # 将数据 instances[start:end] batchify
            batch_word, batch_wordlen, batch_wordrecover, \
            batch_char, batch_charlen, batch_charrecover, batch_label, mask, doc_idx, word_idx \
                = batchify_with_label(instance, data.use_gpu, False)

            mask = mask.eq(1)  # 转化为 T/F
            # 重复多次获得结果,再取平均
            p, lstm_out, outs, word_represent = model.MC_sampling(
                batch_word, batch_wordlen, batch_char, batch_charlen,
                batch_charrecover, data.nsamples)

            # 获得每个 token 的预测结果标签在 alphabet 中的 index
            model1_preds = decode_seq(outs, mask)
            # 获得每个 token 对应的不确定度
            uncertainty = epistemic_uncertainty(p, mask)

            # 总句子数 × [O O O O]
            pred_labels, gold_label = recover_label(model1_preds, batch_label,
                                                    mask, data.label_alphabet,
                                                    batch_wordrecover)
            gold_results += gold_label
            pred_results += pred_labels

    decode_time = time.time() - start_time
    speed = train_num / decode_time

    # 得到测量指标
    acc, p, r, f = get_ner_fmeasure(gold_results, pred_results, data.tagScheme)
    score = f
    print(
        "%s: time: %.2f s, speed: %.2f doc/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f; \n"
        % (name, decode_time, speed, acc, p, r, f))

    # 保存预测结果
    if name == 'raw':
        print("save predicted results to %s" % data.decode_dir)
        data.convert_doc_to_sent(name)
        data.write_decoded_results(pred_results, name)

    return score, pred_results
예제 #30
0
def evaluate(data, model, name, nbest=None):
    if name == "train":
        instances = data.train_Ids
    elif name == "dev":
        instances = data.dev_Ids
    elif name == 'test':
        instances = data.test_Ids
    elif name == 'raw':
        instances = data.raw_Ids
    else:
        print "Error: wrong evaluate name,", name

    show_nbest = False

    right_token = 0
    whole_token = 0
    nbest_pred_results = []
    pred_scores = []

    pred_results = []  # total pred result
    gold_results = []  # total gold result

    # set model in eval model
    model.eval()
    batch_size = data.batch_size
    start_time = time.time()
    train_num = len(instances)
    total_batch = train_num // batch_size + 1

    for batch_id in range(total_batch):
        start = batch_id * batch_size
        end = (batch_id + 1) * batch_size
        if end > train_num:
            end = train_num
        instance = instances[start:end]
        if not instance:
            continue
        batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, batch_trans, trans_seq_lengths, trans_seq_recover, mask = batchify_with_label(
            instance, data.gpu, False)

        if nbest and nbest >= 2:
            scores, nbest_tag_seq = model.decode_nbest(
                batch_word, batch_features, batch_wordlen, batch_char,
                batch_charlen, batch_charrecover, mask, nbest, batch_trans,
                trans_seq_lengths, trans_seq_recover)
            nbest_pred_result = recover_nbest_label(nbest_tag_seq, mask,
                                                    data.label_alphabet,
                                                    batch_wordrecover)
            nbest_pred_results += nbest_pred_result
            pred_scores += scores[batch_wordrecover].cpu().data.numpy().tolist(
            )
            ## select the best sequence to evalurate
            tag_seq = nbest_tag_seq[:, :, 0]

        else:
            tag_seq = model(batch_word, batch_features, batch_wordlen,
                            batch_char, batch_charlen, batch_charrecover, mask,
                            batch_trans, trans_seq_lengths, trans_seq_recover)
        # print "tag:", tag_seq

        pred_label, gold_label = recover_label(tag_seq, batch_label, mask,
                                               data.label_alphabet,
                                               batch_wordrecover)
        pred_results += pred_label
        gold_results += gold_label

    # show nbest out
    instance_count = len(pred_scores)
    if show_nbest:
        for i in range(10):
            x = random.randint(0, instance_count - 1)
            print('---' * 10)
            print 'gold:       ' + ','.join(gold_results[x])
            for j in range(nbest):
                print '%.8f: ' % (pred_scores[x][j]) + ','.join(
                    nbest_pred_results[x][j])

    decode_time = time.time() - start_time
    speed = len(instances) / decode_time
    acc, p, r, f = get_ner_fmeasure(gold_results, pred_results, data.tagScheme)
    if nbest:
        return speed, acc, p, r, f, nbest_pred_results, pred_scores
    return speed, acc, p, r, f, pred_results, pred_scores
예제 #31
0
def evaluate(data, model,logger, name,best_dev = -1):
    if name == "train":
        instances = data.train_Ids
    elif name == "dev":
        instances = data.dev_Ids
    elif name == 'test':
        instances = data.test_Ids
    else:
        print("Error: wrong evaluate name,", name)
        exit(1)
    H2BH_pred_results = []
    H2BB_pred_results = []
    B2HH_pred_results = []
    B2HB_pred_results = []
    hgold_results = []
    lgold_results = []
    ## set modules in eval modules
    model.eval()
    batch_size = model.batch_size
    train_num = len(instances)
    total_batch = train_num//batch_size+1
    for batch_id in range(total_batch):
        start = batch_id*batch_size
        end = (batch_id+1)*batch_size
        if end > train_num:
            end =  train_num
        instance = instances[start:end]
        if not instance:
            continue
        batch_word, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_hlabel,batch_llabel, mask  = batchify_sequence_labeling_with_label(instance, args.gpu,args.max_sent_length, False)
        if args.model == "DUAL":
            H2BH_tag_seqs, H2BB_tag_seqs, B2HB_tag_seqs, B2HH_tag_seqs = model(batch_word, batch_wordlen, batch_char, batch_charlen, batch_charrecover, mask)

            H2BHpred_label, H2BBpred_label, hgold_label, lgold_label = recover_label(H2BH_tag_seqs, H2BB_tag_seqs,batch_hlabel, batch_llabel, mask,
                                                                                     data.hlabelset, data.llabelset,batch_wordrecover)
            B2HHpred_label, B2HBpred_label, _, _ = recover_label(B2HH_tag_seqs, B2HB_tag_seqs,batch_hlabel, batch_llabel, mask,
                                                                 data.hlabelset, data.llabelset,batch_wordrecover)
            H2BH_pred_results += H2BHpred_label
            H2BB_pred_results += H2BBpred_label
            B2HH_pred_results += B2HHpred_label
            B2HB_pred_results += B2HBpred_label
            hgold_results += hgold_label
            lgold_results += lgold_label

        elif args.model == "H2B":
            H2BH_tag_seqs, H2BB_tag_seqs, = model(batch_word, batch_wordlen, batch_char, batch_charlen,batch_charrecover, mask)
            hpred_label, lpred_label, hgold_label, lgold_label = recover_label(H2BH_tag_seqs, H2BB_tag_seqs,batch_hlabel, batch_llabel, mask,
                                                                               data.hlabelset, data.llabelset,batch_wordrecover)
            H2BH_pred_results += hpred_label
            H2BB_pred_results += lpred_label
            hgold_results += hgold_label
            lgold_results += lgold_label

        elif args.model == "B2H":
            B2HB_tag_seqs, B2HH_tag_seqs = model(batch_word, batch_wordlen, batch_char, batch_charlen,batch_charrecover, mask)
            hpred_label, lpred_label, hgold_label, lgold_label = recover_label(B2HH_tag_seqs, B2HB_tag_seqs,
                                                                               batch_hlabel, batch_llabel, mask,
                                                                               data.hlabelset, data.llabelset,
                                                                               batch_wordrecover)
            B2HH_pred_results += hpred_label
            B2HB_pred_results += lpred_label
            hgold_results += hgold_label
            lgold_results += lgold_label

    if args.model == "DUAL":
        H2BH_evals, H2BB_evals, H2B_evals = get_ner_fmeasure(hgold_results, lgold_results, H2BH_pred_results,H2BB_pred_results)
        B2HH_evals, B2HB_evals, B2H_evals = get_ner_fmeasure(hgold_results, lgold_results, B2HH_pred_results,B2HB_pred_results)

    elif args.model == "H2B":
        H2BH_evals, H2BB_evals, H2B_evals = get_ner_fmeasure(hgold_results, lgold_results, H2BH_pred_results,H2BB_pred_results, )
        B2HH_evals, B2HB_evals, B2H_evals = [0, 0, 0, 0], [0, 0, 0], [0, 0, 0]

    elif args.model == "B2H":
        H2BH_evals, H2BB_evals, H2B_evals = [0, 0, 0, 0], [0, 0, 0], [0, 0, 0]
        B2HH_evals, B2HB_evals, B2H_evals = get_ner_fmeasure(hgold_results, lgold_results, B2HH_pred_results,B2HB_pred_results, )

    H2B_results = [H2BH_pred_results, H2BB_pred_results]
    B2H_results = [B2HH_pred_results, B2HB_pred_results]

    logger.info(
        "%s --HIGH layer: H2B MODEL  acc:%.4f , p: %.4f, r: %.4f, f: %.4f ||||| B2H MODEL acc:%.4f , p: %.4f, r: %.4f, f: %.4f ." %
        (name.upper(),H2BH_evals[0], H2BH_evals[1], H2BH_evals[2],H2BH_evals[3], B2HH_evals[0], B2HH_evals[1], B2HH_evals[2], B2HH_evals[3]))

    logger.info(
        "%s --BOT layer: H2B MODEL  p: %.4f, r: %.4f, f: %.4f ||||| B2H MODEL  p: %.4f, r: %.4f, f: %.4f ." %
        (name.upper(),H2BB_evals[0], H2BB_evals[1], H2BB_evals[2], B2HB_evals[0], B2HB_evals[1], B2HB_evals[2]))

    logger.info(
        "%s --ALL layer: H2B MODEL  p: %.4f, r: %.4f, f: %.4f ||||| B2H MODEL  p: %.4f, r: %.4f, f: %.4f .best_f: %.4f" %
        (name.upper(),H2B_evals[0], H2B_evals[1], H2B_evals[2], B2H_evals[0], B2H_evals[1], B2H_evals[2], best_dev))

    print(
        "%s --ALL layer: H2B MODEL  p: %.4f, r: %.4f, f: %.4f ||||| B2H MODEL  p: %.4f, r: %.4f, f: %.4f .best_f: %.4f" %
        (name.upper(),H2B_evals[0], H2B_evals[1], H2B_evals[2], B2H_evals[0], B2H_evals[1], B2H_evals[2], best_dev))

    return H2B_evals,B2H_evals, H2B_results,B2H_results
예제 #32
0
# -*- coding: utf-8 -*-