Пример #1
0
            rec = rec_set[fid]
        result = calculator.calculate(lab, rec)
        acc = float(result['cor'] - result['ins']) * 100.0 / result['all']
        print('acc: %4.2f %%' % acc, end = ' ')
#        print('acc: {acc}'.format(acc = acc*100), end = ' ')
        print('N: {total}'.format(total = result['all']), end = ' ')
        print('C: {correct}'.format(correct = result['cor']), end = ' ')
        print('S: {substitution}'.format(substitution = result['sub']), end = ' ')
        print('D: {deletion}'.format(deletion = result['del']), end = ' ')
        print('I: {insertion}'.format(insertion = result['ins']))
        space = {}
        space['lab'] = []
        space['rec'] = []
        for idx in xrange(len(result['lab'])) :
            len_lab = len(result['lab'][idx])
            if is_chinese(result['lab'][idx]) :
                len_lab = len_lab * 2
            len_rec = len(result['rec'][idx])
            if is_chinese(result['rec'][idx]) :
                len_rec = len_rec * 2
            length = max(len_lab, len_rec)
            space['lab'].append(length-len_lab)
            space['rec'].append(length-len_rec)
        print('lab:', end = ' ')
        for idx, token in enumerate(result['lab']) :
            print('{token}'.format(token = token.encode('utf-8')), end = ' ')
            for n in xrange(space['lab'][idx]) : 
                print(' ', end = '')
        print()
        print('rec:', end = ' ')
        for idx, token in enumerate(result['rec']) :
# coding = utf-8

from Accuracy import is_chinese
import re
import sys

if __name__ == '__main__':
    cluster = {}
    cluster['ch'] = {}
    cluster['en'] = {}
    for line in sys.stdin:
        line = line.decode('utf-8')
        word = line[0:line.find(' ') - 1]
        if is_chinese(word):
            for char in word:
                cluster['ch'][char] = 1
        else:
            cluster['en'][word] = 1
    print('<English>')
    for token in cluster['en']:
        print(token.encode('utf-8'))
    print('<Mandarin>')
    for token in cluster['ch']:
        print(token.encode('utf-8'))
Пример #3
0
# coding = utf-8

from Accuracy import is_chinese
import re
import sys

if __name__ == '__main__':
    cluster = {}
    cluster['ch'] = {}
    cluster['en'] = {}
    for line in sys.stdin :
        line = line.decode('utf-8')
        word = line[0:line.find(' ')-1]
        if is_chinese(word) :
            for char in word :
                cluster['ch'][char] = 1
        else :
            cluster['en'][word] = 1
    print('<English>')
    for token in cluster['en'] :
        print(token.encode('utf-8'))
    print('<Mandarin>')
    for token in cluster['ch'] :
        print(token.encode('utf-8'))