def test_model_global(nn_model,
                      testdata,
                      chardata,
                      pos_data,
                      index2word,
                      resultfile='',
                      batch_size=50):
    index2word[0] = ''
    index2word_BIOES = {0: '', 1: 'B', 2: 'I', 3: 'O', 4: 'E', 5: 'S'}
    index2word_Type = {0: '', 1: 'O', 2: 'LOC', 3: 'ORG', 4: 'PER', 5: 'MISC'}

    testx = np.asarray(testdata[0], dtype="int32")
    testy = np.asarray(testdata[1], dtype="int32")
    testy_BIOES = np.asarray(testdata[3], dtype="int32")
    testy_Type = np.asarray(testdata[4], dtype="int32")
    poslabel_test = np.asarray(pos_data, dtype="int32")
    testchar = np.asarray(chardata, dtype="int32")

    testresult = []
    testresult2 = []
    testresult3 = []
    predictions = nn_model.predict([testx, testchar])

    isfinall = False

    for si in range(0, len(predictions[0])):

        ptag = []
        for iw, word in enumerate(predictions[0][si]):
            next_index = np.argmax(word)
            next_token = index2word_BIOES[next_index]

            next_index2 = np.argmax(predictions[1][si][iw])
            next_token2 = index2word_Type[next_index2]

            if next_token == 'O' or next_token == '':
                ptag.append(next_token)
            else:
                ptag.append(next_token + '-' + next_token2)

        ttag = []
        for word in testy[si]:
            next_index = np.argmax(word)
            next_token = index2word[next_index]
            ttag.append(next_token)

        result3 = []
        result3.append(ptag)
        result3.append(ttag)
        testresult3.append(result3)

    P, R, F, PR_count, P_count, TR_count = evaluation_NER(
        testresult3, resultfile=resultfile)

    print('global---Type>>>>>>>>>>', P, R, F)

    return P, R, F, PR_count, P_count, TR_count
Пример #2
0
def test_model(nn_model,
               inputs_test_x,
               test_y,
               index2word,
               resultfile='',
               batch_size=10):
    index2word[0] = ''

    predictions = nn_model.predict(inputs_test_x)
    testresult = []
    for si in range(0, len(predictions)):
        sent = predictions[si]
        # print('predictions',sent)
        ptag = []
        for word in sent:
            next_index = np.argmax(word)
            # if next_index != 0:
            next_token = index2word[next_index]
            ptag.append(next_token)
        # print('next_token--ptag--',str(ptag))
        senty = test_y[0][si]
        ttag = []
        # flag =0
        for word in senty:
            next_index = np.argmax(word)
            next_token = index2word[next_index]
            # if word > 0:
            #     if flag == 0:
            #         flag = 1
            #         count+=1
            ttag.append(next_token)
        # print(si, 'next_token--ttag--', str(ttag))
        result = []
        result.append(ptag)
        result.append(ttag)

        testresult.append(result)
        # print(result.shape)
    # print('count-----------',count)
    # pickle.dump(testresult, open(resultfile, 'w'))
    #  P, R, F = evaluavtion_triple(testresult)

    P, R, F, PR_count, P_count, TR_count = evaluation_NER(testresult)
    # evaluation_NER2(testresult)
    # print (P, R, F)
    # evaluation_NER_error(testresult)

    return P, R, F, PR_count, P_count, TR_count
def test_model(nn_model,
               testdata,
               chardata,
               pos_data,
               index2word,
               resultfile='',
               batch_size=50):
    index2word[0] = ''

    testx = np.asarray(testdata[0], dtype="int32")
    testy = np.asarray(testdata[1], dtype="int32")
    testpos = np.asarray(pos_data, dtype="int32")
    testchar = np.asarray(chardata, dtype="int32")

    testresult = []

    predictions = nn_model.predict([testx, testchar])

    for si in range(0, len(predictions)):

        sent = predictions[si]
        ptag = []
        for word in sent:
            next_index = np.argmax(word)
            next_token = index2word[next_index]
            ptag.append(next_token)
        # print('next_token--ptag--',str(ptag))

        senty = testy[si]
        ttag = []
        for word in senty:
            next_index = np.argmax(word)
            next_token = index2word[next_index]
            ttag.append(next_token)

        result = []
        result.append(ptag)
        result.append(ttag)
        testresult.append(result)

    P, R, F, PR_count, P_count, TR_count = evaluation_NER(
        testresult, resultfile)

    return P, R, F, PR_count, P_count, TR_count
Пример #4
0
def test_model(nn_model,
               inputs_test_x,
               test_y,
               index2word,
               resultfile='',
               batch_size=10,
               testfile=''):

    index2word[0] = ''

    predictions = nn_model.predict(inputs_test_x)
    testresult = []

    for si in range(0, len(predictions)):
        sent = predictions[si]
        # print('predictions',sent)
        ptag = []
        for word in sent:
            next_index = np.argmax(word)
            # if next_index == 0:
            #     break
            next_token = index2word[next_index]
            ptag.append(next_token)

        senty = test_y[0][si]
        ttag = []

        for word in senty:
            next_index = np.argmax(word)
            if next_index == 0:
                break
            next_token = index2word[next_index]
            ttag.append(next_token)

        result = []
        result.append(ptag)
        result.append(ttag)
        testresult.append(result)

    P, R, F, PR_count, P_count, TR_count = evaluation_NER(testresult)

    return P, R, F, PR_count, P_count, TR_count
Пример #5
0
def test_model(nn_model, index2word, batch_size=10):

    # index2word[0] = ''
    test_D = data_generator_4test(test,
                                  test_posi,
                                  batch_size=batch_size,
                                  maxlen=max_s)

    predictions = nn_model.predict_generator(generator=test_D.__iter__(),
                                             steps=len(test_D),
                                             verbose=1)
    print(len(test), len(predictions))

    testresult = []
    for si in range(0, len(predictions)):
        sent = predictions[si]

        ptag = []
        senty = test_label[si]
        ttag = []

        for wi, word in enumerate(senty):
            next_index = np.argmax(word)

            next_token = index2word[next_index]
            ttag.append(next_token)

            next_index = np.argmax(sent[wi])

            next_token = index2word[next_index]
            ptag.append(next_token)

        result = []
        result.append(ptag)
        result.append(ttag)

        testresult.append(result)

    P, R, F, PR_count, P_count, TR_count = evaluation_NER(testresult)

    return P, R, F, PR_count, P_count, TR_count
Пример #6
0
def test_model_segment(nn_model, testdata, chartest, index2tag):

    index2tag[0] = ''

    testx = np.asarray(testdata[0], dtype="int32")
    testy_BIOES = np.asarray(testdata[1], dtype="int32")
    testchar = np.asarray(chartest, dtype="int32")

    predictions = nn_model.predict([testx, testchar])
    testresult_1Step = []
    testresult2 = []
    for si in range(0, len(predictions)):

        ptag_BIOES = []
        ptag_1Step = []
        for word in predictions[si]:

            next_index = np.argmax(word)
            next_token = index2tag[next_index]
            ptag_BIOES.append(next_token)
            if next_token != '':
                ptag_1Step.append(next_token)

        ttag_BIOES = []
        for word in testy_BIOES[si]:

            next_index = np.argmax(word)
            next_token = index2tag[next_index]
            ttag_BIOES.append(next_token)

        testresult2.append([ptag_BIOES, ttag_BIOES])
        testresult_1Step.append(ptag_1Step)

    P, R, F, PR_count, P_count, TR_count = evaluation_NER(testresult2,
                                                          resultfile='')
    print('NER test results  >>>>>>>>>>', P, R, F, PR_count, P_count, TR_count)

    return testresult_1Step
def test_model_divide(nn_model,
                      testdata,
                      chardata,
                      pos_data,
                      index2word,
                      resultfile='',
                      batch_size=50):
    index2word[0] = ''
    index2word_BIOES = {0: '', 1: 'B', 2: 'I', 3: 'O', 4: 'E', 5: 'S'}
    index2word_Type = {0: '', 1: 'O', 2: 'LOC', 3: 'ORG', 4: 'PER', 5: 'MISC'}

    testx = np.asarray(testdata[0], dtype="int32")
    testy = np.asarray(testdata[1], dtype="int32")
    testy_BIOES = np.asarray(testdata[3], dtype="int32")
    testy_Type = np.asarray(testdata[4], dtype="int32")
    poslabel_test = np.asarray(pos_data, dtype="int32")
    testchar = np.asarray(chardata, dtype="int32")

    testresult = []
    testresult2 = []
    testresult3 = []
    predictions = nn_model.predict([testx, testchar])

    isfinall = False

    for si in range(0, len(predictions[0])):

        ptag_BIOES = []
        for word in predictions[0][si]:
            next_index = np.argmax(word)
            next_token = index2word_BIOES[next_index]
            ptag_BIOES.append(next_token)
        # print('next_token--ptag--',str(ptag))

        ptag_Type = []
        for word in predictions[1][si]:
            next_index = np.argmax(word)
            if len(word) == 6:
                next_token = index2word_Type[next_index]
            else:
                isfinall = True
                next_token = index2word[next_index]
            ptag_Type.append(next_token)
        # print('next_token--ptag--',str(ptag))

        ttag_BIOES = []
        for word in testy_BIOES[si]:
            next_index = np.argmax(word)
            next_token = index2word_BIOES[next_index]
            ttag_BIOES.append(next_token)

        ttag_Type = []
        if isfinall:
            for word in testy[si]:
                next_index = np.argmax(word)
                next_token = index2word[next_index]
                ttag_Type.append(next_token)
        else:
            for word in testy_Type[si]:
                next_index = np.argmax(word)
                next_token = index2word_Type[next_index]
                ttag_Type.append(next_token)

        result2 = []
        result2.append(ptag_BIOES)
        result2.append(ttag_BIOES)
        testresult2.append(result2)

        result3 = []
        result3.append(ptag_Type)
        result3.append(ttag_Type)
        testresult3.append(result3)

    P, R, F, PR_count, P_count, TR_count = evaluation_NER_BIOES(
        testresult2, resultfile=resultfile + '.BIORS.txt')
    print('divide---BIOES>>>>>>>>>>', P, R, F)
    if isfinall:
        P, R, F, PR_count, P_count, TR_count = evaluation_NER(
            testresult3, resultfile=resultfile)
    else:
        P, R, F, PR_count, P_count, TR_count = evaluation_NER_Type(
            testresult3, resultfile=resultfile + '.Type.txt')
    print('divide---Type>>>>>>>>>>', P, R, F)

    return P, R, F, PR_count, P_count, TR_count