예제 #1
0
def main():
    EtoV_dev_list = utilities.read_align_file('../../../Alignment_Split/EtoV_Test.txt')
    VtoE_dev_list = utilities.read_align_file('../../../Alignment_Split/VtoE_Test.txt')
    # CandidateSet.createCandidateSet(EtoV_dev_list,VtoE_dev_list,'test')
    pair = ([10, 11], [12, 13, 14, 15], 'PERSON', 'Richard Matsch', 'Thẩm phán Richard Matsch', 'PERSON', 2, 1)
    print('Candidate',CandidateSet.getCandidateSetFromFile(2)[7],'test')
    tmp = Transliteration.getTransliterationProb(pair,'')
    print(tmp)
예제 #2
0
def main():
    createEntListTable_Stanford()
    createEntListTable_Spacy()
    EtoV_dev_list = utilities.read_align_file(
        '../../Alignment_Split/EtoV_Dev.txt')
    # pairs = getEntSetFromFile(EtoV_dev_list[0]['Source'],EtoV_dev_list[0]['Target'],0)
    # print(pairs)

    source_sent = EtoV_dev_list[0]['Source']
    target_sent = EtoV_dev_list[0]['Target']
    source_ent_list = getEntList_StanfordNER_FromFile(0)
    target_ent_list = getTargetEntList(source_sent, target_sent,
                                       source_ent_list)
    for i in range(len(source_ent_list)):
        # print('Pair', source_ent_list[i],target_ent_list[i])
        align_score = getAlignScore(source_ent_list[i], target_ent_list[i], 0)
        # print('Score', align_score)
    vn_list = list(alignment_table.VN)
    en_list = list(alignment_table.EN)
    prob = list(alignment_table.Prob)
    # print(len(alignment_table.index))
    # print(len(en_list))
    # print(len(vn_list))
    # print(len(prob))
    # for i in range(6465):
    #     print(vn_list[i],en_list[i],prob[i])
    # print(len(alignment_table.index))
    # print(vn_list[153610],en_list[153610],prob[153610])
    print(alignment_table.shape)
예제 #3
0
def main():
    # createEntListTable_Stanford()
    # VtoE_dev_list = utilities.read_align_file('../../Alignment_Split/VtoE_Dev.txt')
    # # pair = getEntSetFromFile(EtoV_dev_list[0]['Source'],EtoV_dev_list[0]['Target'],0)
    # source_sent = VtoE_dev_list[0]['Source']
    # target_sent = VtoE_dev_list[0]['Target']
    # source_ent_list = getEntList_StanfordNER_FromFile(0)
    # target_ent_list = getTargetEntList(source_sent,target_sent,source_ent_list)
    # align_score = getAlignScore(source_ent_list[0],target_ent_list[0],0)
    # print(align_score)
    createEntListTable_Stanford()
    createEntListTable_Spacy()
    VtoE_dev_list = utilities.read_align_file(
        '../../Alignment_Split/VtoE_Dev.txt')
    pair = getEntSetFromFile(VtoE_dev_list[0]['Source'],
                             VtoE_dev_list[0]['Target'], 0)
예제 #4
0
def main():
    NEPair = [[11, 12], [15, 16], "LOCATION", "El Nino", "El Nino", "LOCATION"]
    VtoE_sent = read_align_file('../../../Alignment_Split/VtoE_Dev.txt')[0]
    tmp = getNETranslationProb(NEPair, VtoE_sent)
예제 #5
0
import evaluate_TypeSens
import config
import TrueSet
import ScoreTable
import CandidateSet

lambda_list_to_update = ['mono_en','mono_vn','bi']

lambda_step = 0.1

cur_Count = 0
dev_file_EtoV = config.align_file_EtoV_dev
dev_file_VtoE = config.align_file_VtoE_dev
dev_file_en = config.en_file_dev
dev_file_vn = config.vn_file_dev
dev_list_EtoV = read_align_file(dev_file_EtoV)
dev_list_VtoE = read_align_file(dev_file_VtoE)
config_file = 'config.ini'

trueSet = TrueSet.getFileTrueSet(dev_file_en,dev_file_vn)

#MAIN

def train_dev(list_lambda):    
    """[summary]
    
    Arguments:
        list_lambda {[float]} -- [weight of features]
    output: 
        Final Res of that list of lambda
        res =
예제 #6
0
'''

from AlignmentModel import EtoV_model, VtoE_model
import utilities
import json
import os.path
import config

candidate_set_test_file = config.candidate_set_test_file
candidate_set_dev_file = config.candidate_set_dev_file

Candidate_Set_Table_Dev = None
Candidate_Set_Table_Test = None

EtoV_Dev = utilities.read_align_file(config.align_file_EtoV_dev)
VtoE_Dev = utilities.read_align_file(config.align_file_VtoE_dev)

EtoV_Test = utilities.read_align_file(config.align_file_EtoV_test)
VtoE_Test = utilities.read_align_file(config.align_file_VtoE_test)


def createCandidateSet(EtoV_List, VtoE_List, mode):
    '''
    Create CandidateSet file
    '''

    EtoV_model.createEntListTable_Stanford(mode)
    VtoE_model.createEntListTable_Stanford(mode)
    EtoV_model.createEntListTable_Spacy(mode)
    VtoE_model.createEntListTable_Spacy(mode)
예제 #7
0
align_file_VtoE = '../../Alignment_Split/VtoE_Test.txt'
test_file_en = '../../Data/corpora/0_DATA/3_Test/test_eng'
test_file_vn = '../../Data/corpora/0_DATA/3_Test/test_viet'

config_file = 'config.ini'

outputfile = ''
list_lambda = config.getWeight()
# for key,value in list_lambda.items():
#     list_lambda[key] = float(list_lambda[key])

# for key,value in list_lambda.items():
#     print(list_lambda[key])
#     print(type(list_lambda[key]))

test_list_EtoV = read_align_file(align_file_EtoV)
test_list_VtoE = read_align_file(align_file_VtoE)


def main(lambda_list_to_update):
    print(lambda_list_to_update)
    list_lambda = training_TypeInSens.getBestLambda(lambda_list_to_update)

    CandidateSet.createCandidateSet(test_list_EtoV, test_list_VtoE, 'test')
    print("Create Candidate Set")
    ScoreTable.createScoreTable_TypeInSens(test_list_EtoV, test_list_VtoE,
                                           'test')
    ScoreTable.createScoreTable_TypeSens(test_list_EtoV, test_list_VtoE,
                                         'test')
    print("Created Score Table")
    print(list_lambda)