Exemplo n.º 1
0
def getCandidateSet(EtoV_sent, VtoE_sent, sent_index):
    '''
    Get Candidate Set of 1 sentence pair
    '''

    EtoV_set = EtoV_model.getEntSetFromFile(EtoV_sent['Source'],
                                            EtoV_sent['Target'], sent_index)
    VtoE_set = VtoE_model.getEntSetFromFile(VtoE_sent['Source'],
                                            VtoE_sent['Target'], sent_index)
    V_Ent_List = []
    E_Ent_List = []
    for pair in EtoV_set:
        V_Ent_List.append((pair[1], pair[2], pair[4]))
        E_Ent_List.append((pair[0], pair[2], pair[3]))
    for pair in VtoE_set:
        E_Ent_List.append((pair[1], pair[2], pair[4]))
        V_Ent_List.append((pair[0], pair[2], pair[3]))

    res = []
    for en_ent in E_Ent_List:
        for vn_ent in V_Ent_List:
            res.append((en_ent[0], vn_ent[0], en_ent[1], en_ent[2], vn_ent[2],
                        vn_ent[1]))
    res = utilities.make_unique(res)
    return res
Exemplo n.º 2
0
def getNEPair(EtoV_sent,
              VtoE_sent,
              list_lambda,
              sent_index,
              mode,
              train_mode_InSens=False,
              train_mode_Sens=False):
    '''
    Input: EtoV_sent, VtoE_sent
    Output: NE Pairs List of a sentence pair
    '''
    res = []
    if train_mode_InSens or train_mode_Sens:
        CandidateSet = getCandidateSet.getCandidateSetFromFile(
            sent_index, mode)
    else:
        CandidateSet = getCandidateSet.getCandidateSet(EtoV_sent, VtoE_sent,
                                                       sent_index)

    CombineScore = {}

    CombineScore['TypeSens'] = getCombineScore_TypeSens.getCombineScore(
        CandidateSet,
        EtoV_sent,
        VtoE_sent,
        list_lambda,
        sent_index,
        mode,
        train_mode=train_mode_Sens)
    CombineScore['TypeInSens'] = getCombineScore_InSens.getCombineScore(
        CandidateSet,
        EtoV_sent,
        VtoE_sent,
        list_lambda,
        sent_index,
        train_mode=train_mode_InSens)
    # print('CombineScore TypeSens', CombineScore['TypeSens'])
    res = getFinalRes.getFinalNEPair(CombineScore, CandidateSet, sent_index)
    res = utilities.make_unique(res)
    # print('After Reassign', res)
    return res
Exemplo n.º 3
0
def evaluateSentencePair(predict,true_set):
    '''
    Compare predict set and true set of a sentence pair
    '''
    tp = 0
    predict = utilities.make_unique(predict)
    predict = sorted(predict, key=lambda tmp: tmp[0])
    # print(predict)
    # print('Predict Set ', predict)
    # print('True Set ', true_set)
    for i in range(len(true_set)):
        true_en_begin = true_set[i][0][0]
        # print('True_en_begin ',true_en_begin)
        true_en_end = true_set[i][0][-1]
        # print('True_en_end ',true_en_end)
        sub_predict = []
        for j in range(len(predict)):
            if len(predict[j][0]) < 1:
                continue
            predict_en_begin = predict[j][0][0]
            # print('PredictEnBegin ', predict_en_begin)
            if predict_en_begin == true_en_begin:
                sub_predict.append(predict[j])
        # print('SubPredict ',sub_predict)
        if(len(sub_predict)) > 0:
            for pair in sub_predict:
                # print(pair)
                predict_en_end = pair[0][-1]
                if predict_en_end == true_en_end:
                    if len(pair[1]) < 1:
                        continue
                    predict_vi_begin = pair[1][0]
                    predict_vi_end = pair[1][-1]
                    true_vi_begin = true_set[i][1][0]
                    true_vi_end = true_set[i][1][-1]
                    if true_vi_begin == predict_vi_begin and true_vi_end == predict_vi_end:
                        tp += 1
    # print('TP ', tp)
    return tp
Exemplo n.º 4
0
def evaluateSentencePair(predict, true_set, mode):
    '''
    Compare predict set and true set of a sentence pair
    '''
    print('==================')
    tp = 0
    predict = utilities.make_unique(predict)
    # print('evaluateSentencePair',predict)
    predict = sorted(predict, key=lambda tmp: tmp[0])
    # print('Predict Set', predict)
    # print(predict)
    # print('Predict Set ', predict)
    # print('True Set ', true_set)
    for i in range(len(true_set)):
        true_en_begin = true_set[i][0][0]
        # print('True_en_begin ',true_en_begin)
        true_en_end = true_set[i][0][-1]
        # print('True_en_end ',true_en_end)
        sub_predict = []
        for j in range(len(predict)):
            if len(predict[j][0]) < 1:
                continue
            predict_en_begin = predict[j][0][0]
            # print('PredictEnBegin ', predict_en_begin)
            if predict_en_begin == true_en_begin:
                sub_predict.append(predict[j])
        # print('SubPredict ',sub_predict)
        if (len(sub_predict)) > 0:
            for pair in sub_predict:
                # print(pair)
                predict_en_end = pair[0][-1]
                if predict_en_end == true_en_end:
                    if len(pair[1]) < 1:
                        continue
                    # print('Predict Pair',pair)
                    predict_vi_begin = pair[1][0]
                    predict_vi_end = pair[1][-1]
                    true_vi_begin = true_set[i][1][0]
                    true_vi_end = true_set[i][1][-1]
                    if true_vi_begin == predict_vi_begin and true_vi_end == predict_vi_end:
                        tp += 1
                    else:
                        candidateset = CandidateSet.getCandidateSetFromFile(
                            pair[-2], mode)
                        # print('CandidateSet', candidateset)
                        for k in range(len(candidateset)):
                            print('#####################')
                            print('Candidate', candidateset[k])
                            print(
                                'Score',
                                ScoreTable.getScoreforOneCandidate_TypeInSens(
                                    pair[-2], k))

                        print('--------------')
                        print('True', true_set[i])
                        print('Predict', pair)
                        print(
                            'Score',
                            ScoreTable.getScoreforOneCandidate_TypeInSens(
                                pair[-2], pair[-1]))
                        print('Spacy Vi',
                              VtoE_model.getEntList_Spacy_FromFile(pair[-2]))
                        print('Spacy En',
                              EtoV_model.getEntList_Spacy_FromFile(pair[-2]))
                        print(
                            'Stanford En',
                            EtoV_model.getEntList_StanfordNER_FromFile(
                                pair[-2]))
                        print(
                            'Stanford Vi',
                            VtoE_model.getEntList_StanfordNER_FromFile(
                                pair[-2]))
    # print('TP ', tp)
    return tp