def main():
    # load the parameter from the command line
    opt = parser.parse_args()
    # run the beer to get the beer scores for s1 and s2
    beer1 = "./code/beer_2.0/beer -s " + opt.s1 + " -r " + opt.ref + " --printSentScores > ./tmp/tmp1"
    beer2 = "./code/beer_2.0/beer -s " + opt.s2 + " -r " + opt.ref + " --printSentScores > ./tmp/tmp2"
    print beer1
    print beer2
    os.system(beer1)
    os.system(beer2)
    # extract the score form the file and remove the last line of the tmp file, it contains the total valuation
    tmp_sc1 = [
        float(li.strip('\n').split(' ')[-1]) for li in open('./tmp/tmp1')
    ]
    tmp_sc1.pop(-1)
    tmp_sc2 = [
        float(li.strip('\n').split(' ')[-1]) for li in open('./tmp/tmp2')
    ]
    tmp_sc2.pop(-1)
    # transform the da data to rr data, darr1 is the human scores
    darr = daToRr(tmp_sc1, tmp_sc2, float(opt.threshold))
    writefile(darr, './tmp/darr')
    # read the human socres
    rr = [int(li.rstrip('\n')) for li in open(opt.scores)]
    taur = valTauLike(rr, darr)
    writefile(taur, './tmp/taur', 'a')
    print taur
Пример #2
0
 def _calcu_fitness(self):
     # specific
     scores = '/Users/ihuangyiran/Documents/Workplace_Python/data/MasterArbeit/plan_c_source_rank_de.en/extracted_data_2016/data_scores'
     tgt = [int(li.rstrip('\n')) for li in open(scores)]
     root = '/tmp/decMixture_2016_'
     bs, nd = np.load(root + 's1_0.npy').shape
     s1 = np.zeros([bs, nd])
     s2 = np.zeros([bs, nd])
     ref = np.zeros([bs, nd])
     values = [
         self.genes[i].get_value() for i in range(self.num_genes)
         if self.genes[i].get_key()
     ]
     s = sum(values)
     for i in range(self.num_genes):
         if self.genes[i].get_key():
             ratio = self.genes[i].get_value()
             s1 += (ratio * 1. / s) * np.load(root + 's1_' + str(i) +
                                              '.npy')
             s2 += (ratio * 1. / s) * np.load(root + 's2_' + str(i) +
                                              '.npy')
             ref += (ratio * 1. / s) * np.load(root + 'ref_' + str(i) +
                                               '.npy')
     d1 = [np.linalg.norm(l1 - l2, ord=1) for l1, l2 in zip(s1, ref)]
     d2 = [np.linalg.norm(l1 - l2, ord=1) for l1, l2 in zip(s2, ref)]
     c = [self._compare(l1, l2) for l1, l2 in zip(d1, d2)]
     taul = valTauLike(tgt, c)
     #print(taul)
     return taul
def main():
    # load the parameter from the command line 
    opt = parser.parse_args()
    # run get_embeddings to get the word embeddings for the input
    comm_ref= "python /Users/ihuangyiran/Documents/Workplace_Python/MasterArbeit/get_vector_from_sentence/get_embeddings.py -model "+opt.model+" -type "+opt.type+" -src "+opt.ref+" -output /tmp/data_ref"
    comm_s1 = "python /Users/ihuangyiran/Documents/Workplace_Python/MasterArbeit/get_vector_from_sentence/get_embeddings.py -model "+opt.model+" -type "+opt.type+" -src "+opt.s1+" -output /tmp/data_s1"
    comm_s2 = "python /Users/ihuangyiran/Documents/Workplace_Python/MasterArbeit/get_vector_from_sentence/get_embeddings.py -model "+opt.model+" -type "+opt.type+" -src "+opt.s2+" -output /tmp/data_s2"
    if opt.type == 'decoder_hidden':
        comm_ref= "python /Users/ihuangyiran/Documents/Workplace_Python/MasterArbeit/get_vector_from_sentence/get_embeddings.py -model "+opt.model+" -type "+opt.type+" -src "+opt.src+" -output /tmp/data_ref -tgt " + opt.ref
        comm_s1 = "python /Users/ihuangyiran/Documents/Workplace_Python/MasterArbeit/get_vector_from_sentence/get_embeddings.py -model "+opt.model+" -type "+opt.type+" -src "+opt.src+" -output /tmp/data_s1 -tgt " + opt.s1
        comm_s2 = "python /Users/ihuangyiran/Documents/Workplace_Python/MasterArbeit/get_vector_from_sentence/get_embeddings.py -model "+opt.model+" -type "+opt.type+" -src "+opt.src+" -output /tmp/data_s2 -tgt " + opt.s2

    print comm_ref
    print comm_s1
    print comm_s2
    os.system(comm_ref)
    os.system(comm_s1)
    os.system(comm_s2)

    # run test to get process the word embeddings
    print (">>> start getting the chrF scores")
    chrF1= "python code/NNMetric/test3.py -hyp /tmp/data_s1 -ref /tmp/data_ref -join " + opt.mode + " > ./tmp/tmp1"
    chrF2= "python code/NNMetric/test3.py -hyp /tmp/data_s2 -ref /tmp/data_ref -join " + opt.mode + " > ./tmp/tmp2"
#    chrF1= "python code/NNMetric/test.py -hyp " + opt.s1 + " -ref " + opt.ref + "  -w2v data/word2vec/GoogleNews-vectors-negative300-SLIM.bin.gz -join max > ./tmp/tmp1"
#    chrF2= "python code/NNMetric/test.py -hyp " + opt.s2 + " -ref " + opt.ref + "  -w2v data/word2vec/GoogleNews-vectors-negative300-SLIM.bin.gz -join max > ./tmp/tmp2"
    print (chrF1)
    print (chrF2)
    os.system(chrF1)
    os.system(chrF2)
    print ("<<< finish getting the chrF score and store then in tmp file")
    # extract the score from the file and remove the last three line of the tmp file, it contains the total valuation infos
    print (">>> read the score oben and compare the result")
    tmp_sc1 = [float(li.rstrip('\n')[1:-1].split(' ')[-1]) for li in open('./tmp/tmp1')]
    tmp_sc2 = [float(li.rstrip('\n')[1:-1].split(' ')[-1]) for li in open('./tmp/tmp2')]
    #clean data
    os.system('rm /tmp/data_ref')
    os.system('rm /tmp/data_s1')
    os.system('rm /tmp/data_s2')
    assert(len(tmp_sc1) == len(tmp_sc2))
    def _compare(a,b):
        if a>b:
            return 1
        elif a<b:
            return -1
        else:
            return 0
    zip_sc = zip(tmp_sc1, tmp_sc2)
    tmp_rs = [_compare(sc1,sc2) for sc1, sc2 in zip_sc]
    print ("<<< finish comparing")
    # get the target data and calculate the tau like corr
    print (">>> read target data and calculate the tau like correlation")
    tgt_rs = [int(li.rstrip('\n')) for li in open(opt.scores)]
    taul = valTauLike(tgt_rs, tmp_rs)
    print ("<<< finish.")
    print (taul)
Пример #4
0
def evaluate_tau_like(arr1, arr2):
    """
    arr1 comes from the model
    arr2 comes from the target file
    """
    a1 = arr1.cpu()
    a2 = arr2.cpu()
    a1 = a1.data.numpy()
    a2 = a2.data.numpy()
    a1 = list(map(result_transform_sf_to_score, a1))
    a2 = a2 - 1
    taul = valTauLike(a2, a1) # a2 should go first
    return taul
def main():
    # load the parameter from the command line
    opt = parser.parse_args()
    # run the chrF to get the chrF score for system one and system two
    print(">>> start getting the chrF scores")
    comm1 = 'cat ' + opt.s1 + ' |$MOSESROOT/scripts/tokenizer/tokenizer.perl > /tmp/tokenized_s1'
    comm2 = 'cat ' + opt.s2 + ' |$MOSESROOT/scripts/tokenizer/tokenizer.perl > /tmp/tokenized_s2'
    comm3 = 'cat ' + opt.ref + ' |$MOSESROOT/scripts/tokenizer/tokenizer.perl > /tmp/tokenized_ref'
    print(comm1)
    print(comm2)
    print(comm3)
    os.system(comm1)
    os.system(comm2)
    os.system(comm3)
    comm4 = 'cat /tmp/tokenized_s1 |$MOSESROOT/mert/sentence-bleu /tmp/tokenized_ref > ./tmp/tmp1'
    comm5 = 'cat /tmp/tokenized_s2 |$MOSESROOT/mert/sentence-bleu /tmp/tokenized_ref > ./tmp/tmp2'
    print(comm4)
    print(comm5)
    os.system(comm4)
    os.system(comm5)

    print("<<< finish getting the chrF score and store then in tmp file")
    # extract the score from the file and remove the last three line of the tmp file, it contains the total valuation infos
    print(">>> read the score oben and compare the result")
    tmp_sc1 = [
        float(li.rstrip('\n').split('\t')[-1]) for li in open('./tmp/tmp1')
    ]
    tmp_sc2 = [
        float(li.rstrip('\n').split('\t')[-1]) for li in open('./tmp/tmp2')
    ]
    # conpare the score of two system
    assert (len(tmp_sc1) == len(tmp_sc2))

    def _compare(a, b):
        if a > b:
            return 1
        elif a < b:
            return -1
        else:
            return 0

    zip_sc = zip(tmp_sc1, tmp_sc2)
    tmp_rs = [_compare(sc1, sc2) for sc1, sc2 in zip_sc]
    print("<<< finish comparing")
    # get the target data and calculate the tau like corr
    print(">>> read target data and calculate the tau like correlation")
    tgt_rs = [int(li.rstrip('\n')) for li in open(opt.scores)]
    taul = valTauLike(tgt_rs, tmp_rs)
    print("<<< finish.")
    print(taul)
Пример #6
0
def main():
    # load the parameter from the command line
    opt = parser.parse_args()
    # run the chrF to get the chrF score for system one and system two
    #print (">>> start getting the chrF scores")
    chrF1 = "python code/NNMetric/test2.py -hyp " + opt.s1 + " -ref " + opt.ref + " -type " + opt.type + " > ./tmp/tmp1"
    chrF2 = "python code/NNMetric/test2.py -hyp " + opt.s2 + " -ref " + opt.ref + " -type " + opt.type + " > ./tmp/tmp2"
    #    chrF1= "python code/NNMetric/test.py -hyp " + opt.s1 + " -ref " + opt.ref + "  -w2v data/word2vec/GoogleNews-vectors-negative300-SLIM.bin.gz -join max > ./tmp/tmp1"
    #    chrF2= "python code/NNMetric/test.py -hyp " + opt.s2 + " -ref " + opt.ref + "  -w2v data/word2vec/GoogleNews-vectors-negative300-SLIM.bin.gz -join max > ./tmp/tmp2"
    #print (chrF1)
    #print (chrF2)
    os.system(chrF1)
    os.system(chrF2)
    #print ("<<< finish getting the chrF score and store then in tmp file")
    # extract the score from the file and remove the last three line of the tmp file, it contains the total valuation infos
    #print (">>> read the score oben and compare the result")
    tmp_sc1 = [
        float(li.rstrip('\n')[1:-1].split(' ')[-1])
        for li in open('./tmp/tmp1')
    ]
    tmp_sc2 = [
        float(li.rstrip('\n')[1:-1].split(' ')[-1])
        for li in open('./tmp/tmp2')
    ]
    #tmp_sc1.pop(0)
    #tmp_sc2.pop(0)
    #for i in range(3):
    #    tmp_sc1.pop(-1)
    #    tmp_sc2.pop(-1)
    # conpare the score of two system
    assert (len(tmp_sc1) == len(tmp_sc2))

    def _compare(a, b):
        if a > b:
            return 1
        elif a < b:
            return -1
        else:
            return 0

    zip_sc = zip(tmp_sc1, tmp_sc2)
    tmp_rs = [_compare(sc1, sc2) for sc1, sc2 in zip_sc]
    #print ("<<< finish comparing")
    # get the target data and calculate the tau like corr
    #print (">>> read target data and calculate the tau like correlation")
    tgt_rs = [int(li.rstrip('\n')) for li in open(opt.scores)]
    taul = valTauLike(tgt_rs, tmp_rs)
    #print ("<<< finish.")
    print(opt.ref + '_' + opt.type, taul)
Пример #7
0
def evaluate_tau_like(model, src, tgt):
    """
    arr1 is the output of the model, and arr2 is the tgt
    so we should put the arr1 in the second parameter seat.
    """
    arr1 = predict(model, src)
    arr1 = arr1.numpy()
    arr2 = tgt.numpy()
    if arr1.shape[1] == 3:
        arr1 = list(map(result_transform_sf_to_score, arr1))
        arr2 = arr2 - 1
    else:
        arr1 = list(map(lambda x: round(x), arr1))
    taul = valTauLike(arr2, arr1)
    return taul
Пример #8
0
def main():
    # load the parameter from the command line
    opt = parser.parse_args()
    # run the chrF to get the chrF score for system one and system two
    print(">>> start getting the chrF scores")
    chrF1 = "python ./code/chrF/chrF++.py -H " + opt.s1 + " -R " + opt.ref + " -nw 0  -b 3 -s > ./tmp/tmp1"
    chrF2 = "python ./code/chrF/chrF++.py -H " + opt.s2 + " -R " + opt.ref + " -nw 0  -b 3 -s > ./tmp/tmp2"
    print(chrF1)
    print(chrF2)
    os.system(chrF1)
    os.system(chrF2)
    print("<<< finish getting the chrF score and store then in tmp file")
    # extract the score from the file and remove the last three line of the tmp file, it contains the total valuation infos
    print(">>> read the score oben and compare the result")
    tmp_sc1 = [
        float(li.rstrip('\n').split('\t')[-1]) for li in open('./tmp/tmp1')
    ]
    tmp_sc2 = [
        float(li.rstrip('\n').split('\t')[-1]) for li in open('./tmp/tmp2')
    ]
    tmp_sc1.pop(0)
    tmp_sc2.pop(0)
    for i in range(3):
        tmp_sc1.pop(-1)
        tmp_sc2.pop(-1)
    # conpare the score of two system
    assert (len(tmp_sc1) == len(tmp_sc2))

    def _compare(a, b):
        if a > b:
            return 1
        elif a < b:
            return -1
        else:
            return 0

    zip_sc = zip(tmp_sc1, tmp_sc2)
    tmp_rs = [_compare(sc1, sc2) for sc1, sc2 in zip_sc]
    print("<<< finish comparing")
    # get the target data and calculate the tau like corr
    print(">>> read target data and calculate the tau like correlation")
    tgt_rs = [int(li.rstrip('\n')) for li in open(opt.scores)]
    taul = valTauLike(tgt_rs, tmp_rs)
    print("<<< finish.")
    print(taul)
Пример #9
0
def evaluate_tau_like(model, src, tgt):
    arr1 = predict(model, src)
    arr1 = arr1.numpy()
    arr2 = tgt.numpy()
    if arr1.shape[1] == 3:
        arr1 = list(map(result_transform_sf_to_score, arr1))
        arr2 = arr2 - 1
    else:
        arr1 = list(map(lambda x: round(x), arr1))
    taul = valTauLike(arr1, arr2)
    return taul
    arr1 = predict(model, src)
    arr1 = arr1.numpy()
    arr2 = tgt.numpy()
    if opt.rank:
        if arr1.shape[1] == 3:
            # softmax output
            arr1 = numpy.array(list(map(result_transform_sf_to_score, arr1)))
            arr2 = arr2 - 1
        else:
            arr1 = numpy.array(list(map(lambda x: round(x), arr1)))
    corr = stats.spearmanr(arr1, arr2)[0]
    return corr
Пример #10
0
def test_da_model_with_rr_data(opt, model, data):
    """
    train a da model and test this model with rr data,
        use the s1+ref and s2+ref to get the scores seperantly and compare the score the get the darr result
    input:
        model: a da model
        src: da data 
        tgt: rr data
    """
    # reload the data to do the test
    tgt = "../data/MasterArbeit/plan_c_rank_de.en/train_result"
    src_sys = "../data/MasterArbeit/plan_c_rank_de.en/train_s1_hidden"
    src_sys2 = "../data/MasterArbeit/plan_c_rank_de.en/train_s2_hidden"
    src_ref = "../data/MasterArbeit/plan_c_rank_de.en/train_ref_hidden"
    tgt_val = "../data/MasterArbeit/plan_c_rank_de.en/train_result"
    src_val_sys = "../data/MasterArbeit/plan_c_rank_de.en/train_s1_hidden"
    src_val_sys2 = "../data/MasterArbeit/plan_c_rank_de.en/train_s2_hidden"
    src_val_ref = "../data/MasterArbeit/plan_c_rank_de.en/train_ref_hidden"
    tgt_test = "../data/MasterArbeit/plan_c_rank_de.en/test_result"
    src_test_sys = "../data/MasterArbeit/plan_c_rank_de.en/test_s1_hidden"
    src_test_sys2 = "../data/MasterArbeit/plan_c_rank_de.en/test_s2_hidden"
    src_test_ref = "../data/MasterArbeit/plan_c_rank_de.en/test_ref_hidden"
    rank = True
    sf_output = False
    data.reload_data(src_sys, src_sys2, src_ref, tgt, src_val_sys,
                     src_val_sys2, src_val_ref, tgt_val, src_test_sys,
                     src_test_sys2, src_test_ref, tgt_test, rank, sf_output)
    # compute the scores
    num_train, num_val, num_test = data.get_nu_batch()
    if not rank:
        #    if True:
        for i in range(num_test):
            src, tgt = data.get_test_batch()
            if opt.cuda == "True":
                tgt = tgt.cuda()
            corr = evaluate_corr(model, src, tgt)
            out_corr = out_corr + corr
            tmp_corr = "%d,%f" % (i, corr)
            file_test.write(tmp_corr)
            file_test.write('\n')
    else:
        src = []
        tgt = []
        for i in range(num_test):
            tmp_src, tmp_tgt = data.get_test_batch()
            src.append(tmp_src)
            tgt.append(tmp_tgt)
        src = torch.cat(src)
        tgt = torch.cat(tgt)
        print(src.shape, tgt.shape)
        ref, s1, s2 = src.split(500, 1)
        src1 = torch.cat((s1, ref), 1)
        src2 = torch.cat((s2, ref), 1)
        o1 = predict(model, src1)
        o2 = predict(model, src2)
        print(o1.shape, o2.shape)
        o1 = o1.squeeze().numpy().tolist()
        o2 = o2.squeeze().numpy().tolist()
        rr = daToRr(o1, o2, 0)
        taul = valTauLike(tgt, rr)
    return taul
def main():
    # load the parameter from the command line
    opt = parser.parse_args()
    # get lan pair and year and type
    get_date = re.compile('2\d+')
    get_lan = re.compile('[a-z]{2}\.[a-z]{2}')
    date = get_date.search(opt.s1).group(0)
    lanp = get_lan.search(opt.s1).group(0)
    # set tmp name for the get_embeddings output
    o_ref = '/tmp/' + lanp + date + opt.type + '.ref'
    o_s1 = '/tmp/' + lanp + date + opt.type + '.s1'
    o_s2 = '/tmp/' + lanp + date + opt.type + '.s2'
    # run get_embeddings to get the word embeddings for the input
    if (not os.path.exists(o_ref) or not os.path.exists(o_s1)
            or not os.path.exists(o_s2)):
        comm_ref = "python /Users/ihuangyiran/Documents/Workplace_Python/MasterArbeit/get_vector_from_sentence/get_embeddings.py -model " + opt.model + " -type " + opt.type + " -src " + opt.ref + " -output " + o_ref
        comm_s1 = "python /Users/ihuangyiran/Documents/Workplace_Python/MasterArbeit/get_vector_from_sentence/get_embeddings.py -model " + opt.model + " -type " + opt.type + " -src " + opt.s1 + " -output " + o_s1
        comm_s2 = "python /Users/ihuangyiran/Documents/Workplace_Python/MasterArbeit/get_vector_from_sentence/get_embeddings.py -model " + opt.model + " -type " + opt.type + " -src " + opt.s2 + " -output " + o_s2
        if opt.type == 'decoder_hidden' or opt.type == 'decoder_hidden_last':
            comm_ref = "python /Users/ihuangyiran/Documents/Workplace_Python/MasterArbeit/get_vector_from_sentence/get_embeddings.py -model " + opt.model + " -type " + opt.type + " -src " + opt.src + " -output " + o_ref + " -tgt " + opt.ref
            comm_s1 = "python /Users/ihuangyiran/Documents/Workplace_Python/MasterArbeit/get_vector_from_sentence/get_embeddings.py -model " + opt.model + " -type " + opt.type + " -src " + opt.src + " -output " + o_s1 + " -tgt " + opt.s1
            comm_s2 = "python /Users/ihuangyiran/Documents/Workplace_Python/MasterArbeit/get_vector_from_sentence/get_embeddings.py -model " + opt.model + " -type " + opt.type + " -src " + opt.src + " -output " + o_s2 + " -tgt " + opt.s2

        print(comm_ref)
        print(comm_s1)
        print(comm_s2)
        os.system(comm_ref)
        os.system(comm_s1)
        os.system(comm_s2)

    # run test to get process the word embeddings
    print(">>> start getting the chrF scores")
    chrF1 = "python code/NNMetric/test6.py -hyp " + o_s1 + " -ref " + o_ref + " > /tmp/tmp1"
    chrF2 = "python code/NNMetric/test6.py -hyp " + o_s2 + " -ref " + o_ref + " >  /tmp/tmp2"
    print(chrF1)
    print(chrF2)
    os.system(chrF1)
    os.system(chrF2)
    print("<<< finish getting the chrF score and store then in tmp file")

    print(">>> read the score oben and compare the result")
    tmp_sc1 = [float(li.rstrip('\n')) for li in open('/tmp/tmp1')]
    tmp_sc2 = [float(li.rstrip('\n')) for li in open('/tmp/tmp2')]
    #clean data
    assert (len(tmp_sc1) == len(tmp_sc2))

    def _compare(a, b):
        if a > b:
            return 1
        elif a < b:
            return -1
        else:
            return 0

    zip_sc = zip(tmp_sc1, tmp_sc2)
    tmp_rs = [_compare(sc1, sc2) for sc1, sc2 in zip_sc]
    print("<<< finish comparing")
    # get the target data and calculate the tau like corr
    print(">>> read target data and calculate the tau like correlation")
    tgt_rs = [int(li.rstrip('\n')) for li in open(opt.scores)]
    taul = valTauLike(tgt_rs, tmp_rs)
    print("<<< finish.")
    print(taul)